コード例 #1
0
        public int SavePost(ScrapedPost model)
        {
            int id = 0;

            string sqlConnectionString = ConfigurationManager.ConnectionStrings["DefaultConnection"].ConnectionString;

            using (SqlConnection conn = new SqlConnection(sqlConnectionString))
            {
                conn.Open();
                using (SqlCommand cmd = new SqlCommand("Posts_Insert", conn))
                {
                    cmd.CommandType = CommandType.StoredProcedure;
                    cmd.Parameters.AddWithValue("@Title", model.Title);
                    cmd.Parameters.AddWithValue("@Url", model.URL);

                    SqlParameter param = new SqlParameter("@Id", SqlDbType.Int);
                    param.Direction = ParameterDirection.Output;
                    cmd.Parameters.Add(param);

                    cmd.ExecuteNonQuery();

                    id = (int)cmd.Parameters["@Id"].Value;
                }
                conn.Close();
            }
            return(id);
        }
コード例 #2
0
        public List <ScrapedPost> GetAll()
        {
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;

            List <ScrapedPost> postsList = new List <ScrapedPost>();

            string url = "https://www.reddit.com/r/Games/search?q=&sort=top&restrict_sr=on&t=day";

            var          htmlWeb  = new HtmlWeb();
            HtmlDocument document = null;

            document = htmlWeb.Load(url);

            var anchorTags = document.DocumentNode.Descendants("a")
                             .Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("search-title"));

            foreach (var node in anchorTags)
            {
                ScrapedPost item = new ScrapedPost();
                item.Title = node.InnerText;
                item.URL   = node.GetAttributeValue("href", null);
                postsList.Add(item);
            }
            return(postsList);
        }
コード例 #3
0
        public IEnumerable <ScrapedComment> Scrape(ScrapedPost post)
        {
            Debug.Assert(post != null);
            var      comments = new List <ScrapedComment>();
            DateTime now      = DateTime.Now;

            CommentsRequest graphRequest = new CommentsRequest(post.Id)
            {
                PaginationLimit = 100
            };
            PagedResponse <ScrapedComment> commentsResponse = GraphClient.GetComments <ScrapedComment>(graphRequest);

            // Could be null if the post doesn't exist anymore.
            if (commentsResponse != null)
            {
                foreach (ScrapedComment comment in commentsResponse.AllData())
                {
                    if (comment.FirstScraped == DateTime.MinValue)
                    {
                        comment.FirstScraped = now;
                    }
                    comment.LastScraped = now;
                    comment.Post        = post;

                    comments.Add(Save(comment, Refresh.False));
                }
            }

            return(comments);
        }
コード例 #4
0
        public static dynamic MapPost(ScrapedPost post)
        {
            // Flatten out the post and export it.
            dynamic flattened = new ExpandoObject();

            flattened.Id      = post.Id;
            flattened.Message = post.Message;

            if (post.Topics == null)
            {
                flattened.Topics = "";
            }
            else
            {
                flattened.Topics = string.Join(",", post.Topics);
            }

            flattened.Link        = post.Link;
            flattened.Caption     = post.Caption;
            flattened.Description = post.Description;
            flattened.Permalink   = post.Permalink;

            flattened.CreatedTime = post.CreatedTime;
            flattened.UpdatedTime = post.UpdatedTime;

            flattened.Name       = post.Name;
            flattened.StatusType = post.StatusType;
            flattened.Type       = post.Type;

            flattened.Reactions = post.Reactions.Summary?.TotalCount ?? 0;
            flattened.Comments  = post.Comments.Summary.TotalCount;
            flattened.Shares    = post.Shares.Count;

            flattened.InternalPageId = post.Page.Id;
            flattened.PageId         = post.Page.FacebookId;
            flattened.PageName       = post.Page.Name;
            flattened.PageCategory   = post.Page.Category;
            flattened.PageLikes      = post.Page.FanCount;
            flattened.PageLikesDate  = post.Page.Date;

            flattened.PosterId   = post.Poster?.Id;
            flattened.PosterName = post.Poster?.Name;

            flattened.GeoPoint = post.GeoPoint;

            flattened.PlaceId                = post.Place?.Id;
            flattened.PlaceName              = post.Place?.Name;
            flattened.PlaceLocationCity      = post.Place?.Location?.City;
            flattened.PlaceLocationRegion    = post.Place?.Location?.Region;
            flattened.PlaceLocationCountry   = post.Place?.Location?.Country;
            flattened.PlaceLocationLatitude  = post.Place?.Location?.Latitude;
            flattened.PlaceLocationLongitude = post.Place?.Location?.Longitude;

            flattened.Scraped     = post.Scraped;
            flattened.LastScraped = post.LastScraped;

            return(flattened);
        }
コード例 #5
0
        public HttpResponseMessage SavePost(ScrapedPost model)
        {
            int res = 0;
            WebScraperService svc = new WebScraperService();

            try
            {
                res = svc.SavePost(model);
                return(Request.CreateResponse(HttpStatusCode.OK, res));
            }
            catch (Exception ex)
            {
                return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ex.Message));
            }
        }
コード例 #6
0
        public IEnumerable <ScrapedPost> ImportPosts(IEnumerable <string> postCSVs)
        {
            var      posts       = new List <ScrapedPost>();
            DateTime now         = DateTime.Now;
            int      numberSaved = 0;

            Read(postCSVs, record =>
            {
                string postId         = (string)record["Media Title"];
                ScrapedPost savedPost = PostScraper.Get(postId);
                if (savedPost != null)
                {
                    // Skip posts that already exist.
                    //Console.WriteLine($"Skipping {postId}.");
                    //continue;
                }

                ScrapedPost post = PostScraper.ScrapePost(postId);
                bool useDatabase = post == null;
                if (post == null)
                {
                    // Post has been deleted - we still want to save it..
                    post = new ScrapedPost {
                        Id = postId
                    };
                    Console.WriteLine($"Post {postId} does not exist.");
                }

                string normalizedPageName = null;
                foreach (string field in record.Keys)
                {
                    string trimmedField = field.Trim();
                    string value        = (string)record[field];

                    // If the post doesn't exist, we need to import various stuff from the page.
                    if (useDatabase)
                    {
                        if (trimmedField == "#_Post_Likes")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfLikes))
                            {
                                Console.WriteLine("Cannot parse number of likes. Skipping...");
                                post.Reactions = new Reactions
                                {
                                    Summary = new ReactionsSummary {
                                        TotalCount = -1
                                    }
                                };
                                continue;
                            }

                            post.Reactions = new Reactions
                            {
                                Summary = new ReactionsSummary {
                                    TotalCount = numberOfLikes
                                }
                            };
                        }
                        else if (trimmedField == "#_Post_Comments")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfComments))
                            {
                                Console.WriteLine("Cannot parse number of comments. Skipping...");
                                post.Comments = new Comments
                                {
                                    Summary = new CommentsSummary {
                                        TotalCount = -1
                                    }
                                };
                                continue;
                            }

                            post.Comments = new Comments
                            {
                                Summary = new CommentsSummary {
                                    TotalCount = numberOfComments
                                }
                            };
                        }
                        else if (trimmedField == "#_Post_Shares")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfShares))
                            {
                                Console.WriteLine("Cannot parse number of shares. Skipping...");
                                post.Shares = new Shares {
                                    Count = -1
                                };
                                continue;
                            }

                            post.Shares = new Shares {
                                Count = numberOfShares
                            };
                        }
                        else if (trimmedField == "Post_Date" || trimmedField == "Excerpt Date")
                        {
                            DateTime date    = DateTime.ParseExact(value, "M/d/yyyy", null);
                            post.CreatedTime = date;
                        }
                        else if (trimmedField == "Excerpt Copy")
                        {
                            post.Message = value;
                        }
                    }

                    // Turn the comma separated list of topics into an array.
                    if (trimmedField == "Codes Applied Combined")
                    {
                        IEnumerable <string> topics = value.Split(',').Select(c => c.Trim());
                        post.Topics = topics;
                    }

                    // Get the page from the post.
                    if (trimmedField == "Page Name")
                    {
                        normalizedPageName = Mappings[value.Trim()].Name;
                    }
                }

                // Get the nearest data we have for page likes at the time the post was created.
                Debug.Assert(normalizedPageName != null);
                PostScraper.UpdateMetadata(post, normalizedPageName);

                // Print the progress to make sure we know something is happening.
                numberSaved++;
                Console.WriteLine(numberSaved);

                // Save the post.
                posts.Add(PostScraper.Save(post, Refresh.False));
            });

            return(posts);
        }