Esempio n. 1
0
        public async Task <FourChanThreadData[]> GetMainPageData(string board)
        {
            string catalogUrl = String.Format(baseUrl, board) + catalogPostfix;

            byte[] bs = await httpClient.GetByteArrayAsync(catalogUrl);

            string contents = GetStringFromBytes(bs);

            Match m = Regex.Match(contents, threadsParseRegex);

            string json = m.Groups["contents"].Value;

            var deserialized = JsonConvert.DeserializeObject <Dictionary <long, ThreadJsonBackEnd> >(json);
            List <FourChanThreadData> list = new List <FourChanThreadData>(deserialized.Count);

            foreach (var kvp in deserialized)
            {
                ThreadJsonBackEnd be = kvp.Value;

                FourChanThreadData fcti = new FourChanThreadData()
                {
                    Id      = kvp.Key,
                    Board   = board,
                    Images  = be.i,
                    Replies = be.r,
                    Sub     = WebUtility.HtmlDecode(be.sub),
                    Teaser  = WebUtility.HtmlDecode(be.teaser)
                };
                list.Add(fcti);
            }

            FourChanThreadData[] ret = list.ToArray();
            return(ret);
        }
        public async Task<FourChanThreadData[]> GetMainPageData(string board)
        {
            string catalogUrl = String.Format(baseUrl, board) + catalogPostfix;
            byte[] bs = await httpClient.GetByteArrayAsync(catalogUrl);
            string contents = GetStringFromBytes(bs);

            Match m = Regex.Match(contents, threadsParseRegex);

            string json = m.Groups["contents"].Value;

            var deserialized = JsonConvert.DeserializeObject<Dictionary<long, ThreadJsonBackEnd>>(json);
            List<FourChanThreadData> list = new List<FourChanThreadData>(deserialized.Count);

            foreach (var kvp in deserialized)
            {
                ThreadJsonBackEnd be = kvp.Value;

                FourChanThreadData fcti = new FourChanThreadData()
                {
                    Id = kvp.Key,
                    Board = board,
                    Images = be.i,
                    Replies = be.r,
                    Sub = WebUtility.HtmlDecode(be.sub),
                    Teaser = WebUtility.HtmlDecode(be.teaser)
                };
                list.Add(fcti);
            }

            FourChanThreadData[] ret = list.ToArray();
            return ret;
        }
        public async Task<FourChanPostData[]> GetThreadData(FourChanThreadData fti)
        {
            string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id);
            byte[] bs = await httpClient.GetByteArrayAsync(pageUrl);
            string reply = GetStringFromBytes(bs);

            Regex r = new Regex(singlePostPattern);
            Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern);
            Regex fullPicUrlRegex = new Regex(fullPicUrlRegexPattern);
            Regex postTextRegex = new Regex(postTextRegexPattern);

            Match m = r.Match(reply);

            if (!m.Success)
                throw new Exception("No matches found for the thread page. Possibly format changed?");

            List<FourChanPostData> posts = new List<FourChanPostData>();

            while (m.Success)
            {
                FourChanPostData fcp = new FourChanPostData();
                fcp.Id = long.Parse(m.Groups["id"].Value);

                string contents = m.Groups["contents"].Value;

                // Small pic.
                Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents);
                if (thumbPicUrlMatch.Success)
                {
                    string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value;
                    fcp.ThumbnailUrl = thumbnailUrl;
                    if (fcp.ThumbnailUrl.StartsWith("//"))
                        fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl;
                }

                // Full pic.
                if (!String.IsNullOrEmpty(fcp.ThumbnailUrl))
                {
                    Match fullPicUrlMatch = fullPicUrlRegex.Match(contents);
                    if (!fullPicUrlMatch.Success)
                    {
                        throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + ".");
                    }

                    fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value;
                    if (fcp.FullPicUrl.StartsWith("//"))
                        fcp.FullPicUrl = "http:" + fcp.FullPicUrl;
                }

                // Text.
                Match textMatch = postTextRegex.Match(contents);
                string parsedText = textMatch.Groups["text"].Value;
                fcp.Text = TextUtil.StripTags(parsedText);
                posts.Add(fcp);

                m = m.NextMatch();
            }

            FourChanPostData[] ret = posts.ToArray();
            return ret;
        }
Esempio n. 4
0
        public async Task <FourChanPostData[]> GetThreadData(FourChanThreadData fti)
        {
            string pageUrl = String.Format(baseUrl, fti.Board) + String.Format(threadPostfix, fti.Id);

            byte[] bs = await httpClient.GetByteArrayAsync(pageUrl);

            string reply = GetStringFromBytes(bs);

            Regex r = new Regex(singlePostPattern);
            Regex smallPicUrlRegex = new Regex(smallPicUrlRegexPattern);
            Regex fullPicUrlRegex  = new Regex(fullPicUrlRegexPattern);
            Regex postTextRegex    = new Regex(postTextRegexPattern);

            Match m = r.Match(reply);

            if (!m.Success)
            {
                throw new Exception("No matches found for the thread page. Possibly format changed?");
            }

            List <FourChanPostData> posts = new List <FourChanPostData>();

            while (m.Success)
            {
                FourChanPostData fcp = new FourChanPostData();
                fcp.Id = long.Parse(m.Groups["id"].Value);

                string contents = m.Groups["contents"].Value;

                // Small pic.
                Match thumbPicUrlMatch = smallPicUrlRegex.Match(contents);
                if (thumbPicUrlMatch.Success)
                {
                    string thumbnailUrl = thumbPicUrlMatch.Groups["surl"].Value;
                    fcp.ThumbnailUrl = thumbnailUrl;
                    if (fcp.ThumbnailUrl.StartsWith("//"))
                    {
                        fcp.ThumbnailUrl = "http:" + fcp.ThumbnailUrl;
                    }
                }

                // Full pic.
                if (!String.IsNullOrEmpty(fcp.ThumbnailUrl))
                {
                    Match fullPicUrlMatch = fullPicUrlRegex.Match(contents);
                    if (!fullPicUrlMatch.Success)
                    {
                        throw new Exception("Can't match full pic, but matched thumbnail. Format changed? Id=" + fcp.Id + " in " + pageUrl + ".");
                    }

                    fcp.FullPicUrl = fullPicUrlMatch.Groups["furl"].Value;
                    if (fcp.FullPicUrl.StartsWith("//"))
                    {
                        fcp.FullPicUrl = "http:" + fcp.FullPicUrl;
                    }
                }

                // Text.
                Match  textMatch  = postTextRegex.Match(contents);
                string parsedText = textMatch.Groups["text"].Value;
                fcp.Text = TextUtil.StripTags(parsedText);
                posts.Add(fcp);

                m = m.NextMatch();
            }

            FourChanPostData[] ret = posts.ToArray();
            return(ret);
        }