public async Task<FourChanPostData[]> GetThreadData(FourChanThreadData fti) { string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id); byte[] bs = await httpClient.GetByteArrayAsync(pageUrl); string reply = GetStringFromBytes(bs); Regex r = new Regex(singlePostPattern); Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern); Regex fullPicUrlRegex = new Regex(fullPicUrlRegexPattern); Regex postTextRegex = new Regex(postTextRegexPattern); Match m = r.Match(reply); if (!m.Success) throw new Exception("No matches found for the thread page. Possibly format changed?"); List<FourChanPostData> posts = new List<FourChanPostData>(); while (m.Success) { FourChanPostData fcp = new FourChanPostData(); fcp.Id = long.Parse(m.Groups["id"].Value); string contents = m.Groups["contents"].Value; // Small pic. Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents); if (thumbPicUrlMatch.Success) { string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value; fcp.ThumbnailUrl = thumbnailUrl; if (fcp.ThumbnailUrl.StartsWith("//")) fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl; } // Full pic. if (!String.IsNullOrEmpty(fcp.ThumbnailUrl)) { Match fullPicUrlMatch = fullPicUrlRegex.Match(contents); if (!fullPicUrlMatch.Success) { throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + "."); } fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value; if (fcp.FullPicUrl.StartsWith("//")) fcp.FullPicUrl = "http:" + fcp.FullPicUrl; } // Text. Match textMatch = postTextRegex.Match(contents); string parsedText = textMatch.Groups["text"].Value; fcp.Text = TextUtil.StripTags(parsedText); posts.Add(fcp); m = m.NextMatch(); } FourChanPostData[] ret = posts.ToArray(); return ret; }
public async Task <FourChanPostData[]> GetThreadData(FourChanThreadData fti) { string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id); byte[] bs = await httpClient.GetByteArrayAsync(pageUrl); string reply = GetStringFromBytes(bs); Regex r = new Regex(singlePostPattern); Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern); Regex fullPicUrlRegex = new Regex(fullPicUrlRegexPattern); Regex postTextRegex = new Regex(postTextRegexPattern); Match m = r.Match(reply); if (!m.Success) { throw new Exception("No matches found for the thread page. Possibly format changed?"); } List <FourChanPostData> posts = new List <FourChanPostData>(); while (m.Success) { FourChanPostData fcp = new FourChanPostData(); fcp.Id = long.Parse(m.Groups["id"].Value); string contents = m.Groups["contents"].Value; // Small pic. Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents); if (thumbPicUrlMatch.Success) { string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value; fcp.ThumbnailUrl = thumbnailUrl; if (fcp.ThumbnailUrl.StartsWith("//")) { fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl; } } // Full pic. if (!String.IsNullOrEmpty(fcp.ThumbnailUrl)) { Match fullPicUrlMatch = fullPicUrlRegex.Match(contents); if (!fullPicUrlMatch.Success) { throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + "."); } fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value; if (fcp.FullPicUrl.StartsWith("//")) { fcp.FullPicUrl = "http:" + fcp.FullPicUrl; } } // Text. Match textMatch = postTextRegex.Match(contents); string parsedText = textMatch.Groups["text"].Value; fcp.Text = TextUtil.StripTags(parsedText); posts.Add(fcp); m = m.NextMatch(); } FourChanPostData[] ret = posts.ToArray(); return(ret); }