/// <summary> /// Fetch latest posts /// </summary> /// <returns>List or null in case of errors</returns> internal async Task <List <Post> > fetchPosts() { // fetch posts (amount of limit set) HashSet <RedditPost> rpList; try { RedditPostPage rpp = await RedditScraper.ParsePosts(FUNNY_URL + "&limit=" + POST_FETCH_LIMIT); rpList = rpp.Posts; } catch (WebException we) { Debug.WriteLine(we.Message); return(null); } // convert reddit posts into funnyposts ConcurrentStack <Post> fpStack = new ConcurrentStack <Post>(); int dclBackup = ServicePointManager.DefaultConnectionLimit; ServicePointManager.DefaultConnectionLimit = MAX_TASKS; Parallel.ForEach(rpList, new ParallelOptions { MaxDegreeOfParallelism = MAX_TASKS }, post => { Post fp = createFunnyPost(post).Result; if (fp != null) { fpStack.Push(fp); } }); ServicePointManager.DefaultConnectionLimit = dclBackup; return(new List <Post>(fpStack)); }
/// <summary> /// Parse reddit posts from given url. i.e. http://www.reddit.com/r/worldnews.json /// Throws AggregateException as this is task. Should contain WebException with info. /// </summary> /// <param name="url"></param> /// <returns></returns> public static async Task <RedditPostPage> ParsePosts(string url) { RedditPostPage rpp = new RedditPostPage(); HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.Method = "GET"; req.Accept = "application/json"; req.Timeout = 3000; try { // get response and handle result using (HttpWebResponse res = (HttpWebResponse)await req.GetResponseAsync()) { // might be unnecessary check if (res.StatusCode == HttpStatusCode.OK) { // stream -> json readers using (StreamReader sr = new StreamReader(res.GetResponseStream())) using (JsonTextReader jtr = new JsonTextReader(sr)) { // load json try { JObject all = JObject.Load(jtr); rpp.Before = all["data"]["before"].ToObject <string>(); // token to previous page rpp.After = all["data"]["after"].ToObject <string>(); // token to next page // post array JArray posts = (JArray)all["data"]["children"]; // parse posts, Todo: could we somehow avoid loop? foreach (JToken post in posts) { RedditPost p = post["data"].ToObject <RedditPost>(); rpp.Posts.Add(p); } } catch (JsonReaderException jre) { Debug.WriteLine(jre.Message); throw new WebException("ParsePosts, invalid data in response from " + url); } } } } } catch (WebException) { // Todo: could remove try as we don't handle exception... throw; } return(rpp); }
public void ParsePostsInvalidUri() { RedditPostPage rpp = RedditScraper.ParsePosts("http://127.0.0.1.json").Result; }
public void ParsePosts() { RedditPostPage rpp = RedditScraper.ParsePosts("http://www.reddit.com/r/csharp.json").Result; Assert.IsFalse(rpp.Posts.Count == 0); }