예제 #1
0
        private ChattyPage ParseChattyPage(string html)
        {
            _threadParser.CheckContentId(html);

            var chattyPage = new ChattyPage {
                Threads = new List <ChattyThread>()
            };

            var p = new Parser(html);

            p.Seek(1, "<div id=\"chatty_comments_wrap");

            if (p.Peek(1, "<div class=\"pagenavigation\">") == -1)
            {
                chattyPage.CurrentPage = 1;
            }
            else
            {
                p.Seek(1, _pageNavigationStart);

                if (p.Peek(1, "<a rel=\"nofollow\" class=\"selected_page\"") == -1)
                {
                    chattyPage.CurrentPage = 1;
                }
                else
                {
                    chattyPage.CurrentPage = int.Parse(p.Clip(
                                                           _pageCurrentPageStart,
                                                           "</a>"));
                }
            }

            p.Seek(1, _pageChattySettingsStart);

            var numThreads = int.Parse(p.Clip(
                                           _pageNumThreadsStart,
                                           " Threads"));

            chattyPage.LastPage = (int)Math.Max(Math.Ceiling(numThreads / 40d), 1);

            while (p.Peek(1, "<div class=\"fullpost") != -1)
            {
                try
                {
                    var thread = _threadParser.ParseThreadTree(p);
                    chattyPage.Threads.Add(thread);
                }
                catch (MissingThreadException)
                {
                    // this can be ok, it's possible for a page to only exist because it contains entirely nuked
                    // threads that we can't see
                    break;
                }

                if (chattyPage.Threads.Count > 40)
                {
                    throw new ParsingException("Too many threads. Something is wrong.");
                }
            }

            return(chattyPage);
        }
예제 #2
0
        public async Task <(string Html, ChattyPage Page)> GetChattyPage(StepStopwatch stopwatch,
                                                                         int page, string previousHtml = null, ChattyPage previousChattyPage = null)
        {
            stopwatch?.Step($"Download_{page}");
            var url  = $"https://www.shacknews.com/chatty?page={page}";
            var html = await _downloadService.DownloadWithSharedLogin(url);

            stopwatch?.Step($"Parse_{page}");
            // remove the progress meter which changes on every load, so that we'll get identical html when nothing
            // has changed
            html = _progressMeterRegex.Replace(html, "");

            // remove the comment count too, it appears on all pages even though some pages haven't changed
            html = _commentCountRegex.Replace(html, "");

            return(html, html == previousHtml ? previousChattyPage : ParseChattyPage(html));
        }