public async Task<FourChanPostData[]> GetThreadData(FourChanThreadData fti)
        {
            string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id);
            byte[] bs = await httpClient.GetByteArrayAsync(pageUrl);
            string reply = GetStringFromBytes(bs);

            Regex r = new Regex(singlePostPattern);
            Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern);
            Regex fullPicUrlRegex = new Regex(fullPicUrlRegexPattern);
            Regex postTextRegex = new Regex(postTextRegexPattern);

            Match m = r.Match(reply);

            if (!m.Success)
                throw new Exception("No matches found for the thread page. Possibly format changed?");

            List<FourChanPostData> posts = new List<FourChanPostData>();

            while (m.Success)
            {
                FourChanPostData fcp = new FourChanPostData();
                fcp.Id = long.Parse(m.Groups["id"].Value);

                string contents = m.Groups["contents"].Value;

                // Small pic.
                Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents);
                if (thumbPicUrlMatch.Success)
                {
                    string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value;
                    fcp.ThumbnailUrl = thumbnailUrl;
                    if (fcp.ThumbnailUrl.StartsWith("//"))
                        fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl;
                }

                // Full pic.
                if (!String.IsNullOrEmpty(fcp.ThumbnailUrl))
                {
                    Match fullPicUrlMatch = fullPicUrlRegex.Match(contents);
                    if (!fullPicUrlMatch.Success)
                    {
                        throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + ".");
                    }

                    fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value;
                    if (fcp.FullPicUrl.StartsWith("//"))
                        fcp.FullPicUrl = "http:" + fcp.FullPicUrl;
                }

                // Text.
                Match textMatch = postTextRegex.Match(contents);
                string parsedText = textMatch.Groups["text"].Value;
                fcp.Text = TextUtil.StripTags(parsedText);
                posts.Add(fcp);

                m = m.NextMatch();
            }

            FourChanPostData[] ret = posts.ToArray();
            return ret;
        }
示例#2
0
        public async Task <FourChanPostData[]> GetThreadData(FourChanThreadData fti)
        {
            string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id);

            byte[] bs = await httpClient.GetByteArrayAsync(pageUrl);

            string reply = GetStringFromBytes(bs);

            Regex r = new Regex(singlePostPattern);
            Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern);
            Regex fullPicUrlRegex  = new Regex(fullPicUrlRegexPattern);
            Regex postTextRegex    = new Regex(postTextRegexPattern);

            Match m = r.Match(reply);

            if (!m.Success)
            {
                throw new Exception("No matches found for the thread page. Possibly format changed?");
            }

            List <FourChanPostData> posts = new List <FourChanPostData>();

            while (m.Success)
            {
                FourChanPostData fcp = new FourChanPostData();
                fcp.Id = long.Parse(m.Groups["id"].Value);

                string contents = m.Groups["contents"].Value;

                // Small pic.
                Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents);
                if (thumbPicUrlMatch.Success)
                {
                    string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value;
                    fcp.ThumbnailUrl = thumbnailUrl;
                    if (fcp.ThumbnailUrl.StartsWith("//"))
                    {
                        fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl;
                    }
                }

                // Full pic.
                if (!String.IsNullOrEmpty(fcp.ThumbnailUrl))
                {
                    Match fullPicUrlMatch = fullPicUrlRegex.Match(contents);
                    if (!fullPicUrlMatch.Success)
                    {
                        throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + ".");
                    }

                    fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value;
                    if (fcp.FullPicUrl.StartsWith("//"))
                    {
                        fcp.FullPicUrl = "http:" + fcp.FullPicUrl;
                    }
                }

                // Text.
                Match  textMatch  = postTextRegex.Match(contents);
                string parsedText = textMatch.Groups["text"].Value;
                fcp.Text = TextUtil.StripTags(parsedText);
                posts.Add(fcp);

                m = m.NextMatch();
            }

            FourChanPostData[] ret = posts.ToArray();
            return(ret);
        }