// if the ID doesn't exist, the returned list will be empty public async Task <List <ChattyPost> > GetThreadBodies(int threadId) { var url = $"https://www.shacknews.com/frame_laryn.x?root={threadId}"; var html = await _downloadService.DownloadWithSharedLogin(url, verifyLoginStatus : false); if (!html.Contains("</html>", StringComparison.Ordinal)) { throw new ParsingException("Shacknews thread tree HTML ended prematurely."); } var p = new Parser(html); var list = new List <ChattyPost>(); while (p.Peek(1, "<div id=\"item_") != -1) { var reply = new ChattyPost(); reply.Id = int.Parse(p.Clip( _threadBodiesReplyIdStart, "\">")); reply.Category = V2ModerationFlagConverter.Parse(p.Clip( new[] { "<div class=\"fullpost", "fpmod_", "_" }, " ")); var authorSection = p.Clip(_threadBodiesAuthorSectionStart, "\""); reply.AuthorId = int.Parse(authorSection.Replace("fpfrozen", "")); reply.IsFrozen = authorSection.Contains("fpfrozen", StringComparison.Ordinal); reply.Author = HtmlDecodeExceptLtGt(p.Clip( _threadBodiesReplyAuthorStart, "</a>")).Trim(); reply.AuthorFlair = ParseUserFlair(p.Clip(_threadBodiesAuthorFlairStart, "</span>")); reply.Body = MakeSpoilersClickable(HtmlDecodeExceptLtGt(RemoveNewlines(p.Clip( _threadBodiesReplyBodyStart, "</div>")))); reply.Date = DateParser.Parse(StripTags(p.Clip( _threadBodiesReplyDateStart, "T</div")).Replace("Flag", "") + "T"); list.Add(reply); } return(list); }
public ChattyThread ParseThreadTree(Parser p, bool stopAtFullPost = true) { if (p.Peek(1, "<div class=\"postbody\">") == -1) { throw new MissingThreadException($"Thread does not exist."); } var list = new List <ChattyPost>(); var authorSection = p.Clip(_threadTreeAuthorSectionStart, "\""); var rootAuthorId = int.Parse(authorSection.Replace("fpfrozen", "")); var rootIsFrozen = authorSection.Contains("fpfrozen", StringComparison.Ordinal); var rootAuthorFlair = ParseUserFlair(p.Clip(_threadTreeAuthorFlairStart, "</span>")); var rootBody = MakeSpoilersClickable(HtmlDecodeExceptLtGt(RemoveNewlines(p.Clip( _threadTreeRootBodyStart, "</div>")))); var rootDate = DateParser.Parse(StripTags(p.Clip( _threadTreeRootDateStart, "T</div")).Replace("Flag", "") + "T"); var depth = 0; var nextThread = p.Peek(1, "<div class=\"fullpost op"); if (nextThread == -1) { nextThread = p.Length; } while (true) { var nextReply = p.Peek(1, "<div class=\"oneline"); if (nextReply == -1 || (stopAtFullPost && nextReply > nextThread)) { break; } var reply = new ChattyPost { Depth = depth }; if (list.Count == 0) { reply.Body = rootBody; reply.Date = rootDate; reply.AuthorId = rootAuthorId; reply.AuthorFlair = rootAuthorFlair; reply.IsFrozen = rootIsFrozen; } reply.Category = V2ModerationFlagConverter.Parse(p.Clip( _threadTreeReplyCategoryStart, " ")); reply.Id = int.Parse(p.Clip( _threadTreeReplyIdStart, "\"")); reply.Author = HtmlDecodeExceptLtGt(p.Clip( _threadTreeReplyAuthorStart, "</span>")); // Determine the next level of depth. while (true) { var nextLi = p.Peek(1, "<li "); var nextUl = p.Peek(1, "<ul>"); var nextEndUl = p.Peek(1, "</ul>"); if (nextLi == -1) { nextLi = nextThread; } if (nextUl == -1) { nextUl = nextThread; } if (nextEndUl == -1) { nextEndUl = nextThread; } var next = Math.Min(Math.Min(nextLi, nextUl), nextEndUl); if (next == nextThread) { // This thread has no more replies. break; } else if (next == nextLi) { // Next reply is on the same depth level. break; } else if (next == nextUl) { // Next reply is underneath this one. depth++; } else if (next == nextEndUl) { // Next reply is above this one. depth--; } p.Cursors[1] = next + 1; } list.Add(reply); } return(new ChattyThread { Posts = list }); }