public int SavePost(ScrapedPost model) { int id = 0; string sqlConnectionString = ConfigurationManager.ConnectionStrings["DefaultConnection"].ConnectionString; using (SqlConnection conn = new SqlConnection(sqlConnectionString)) { conn.Open(); using (SqlCommand cmd = new SqlCommand("Posts_Insert", conn)) { cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.AddWithValue("@Title", model.Title); cmd.Parameters.AddWithValue("@Url", model.URL); SqlParameter param = new SqlParameter("@Id", SqlDbType.Int); param.Direction = ParameterDirection.Output; cmd.Parameters.Add(param); cmd.ExecuteNonQuery(); id = (int)cmd.Parameters["@Id"].Value; } conn.Close(); } return(id); }
public List <ScrapedPost> GetAll() { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; List <ScrapedPost> postsList = new List <ScrapedPost>(); string url = "https://www.reddit.com/r/Games/search?q=&sort=top&restrict_sr=on&t=day"; var htmlWeb = new HtmlWeb(); HtmlDocument document = null; document = htmlWeb.Load(url); var anchorTags = document.DocumentNode.Descendants("a") .Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("search-title")); foreach (var node in anchorTags) { ScrapedPost item = new ScrapedPost(); item.Title = node.InnerText; item.URL = node.GetAttributeValue("href", null); postsList.Add(item); } return(postsList); }
public IEnumerable <ScrapedComment> Scrape(ScrapedPost post) { Debug.Assert(post != null); var comments = new List <ScrapedComment>(); DateTime now = DateTime.Now; CommentsRequest graphRequest = new CommentsRequest(post.Id) { PaginationLimit = 100 }; PagedResponse <ScrapedComment> commentsResponse = GraphClient.GetComments <ScrapedComment>(graphRequest); // Could be null if the post doesn't exist anymore. if (commentsResponse != null) { foreach (ScrapedComment comment in commentsResponse.AllData()) { if (comment.FirstScraped == DateTime.MinValue) { comment.FirstScraped = now; } comment.LastScraped = now; comment.Post = post; comments.Add(Save(comment, Refresh.False)); } } return(comments); }
public static dynamic MapPost(ScrapedPost post) { // Flatten out the post and export it. dynamic flattened = new ExpandoObject(); flattened.Id = post.Id; flattened.Message = post.Message; if (post.Topics == null) { flattened.Topics = ""; } else { flattened.Topics = string.Join(",", post.Topics); } flattened.Link = post.Link; flattened.Caption = post.Caption; flattened.Description = post.Description; flattened.Permalink = post.Permalink; flattened.CreatedTime = post.CreatedTime; flattened.UpdatedTime = post.UpdatedTime; flattened.Name = post.Name; flattened.StatusType = post.StatusType; flattened.Type = post.Type; flattened.Reactions = post.Reactions.Summary?.TotalCount ?? 0; flattened.Comments = post.Comments.Summary.TotalCount; flattened.Shares = post.Shares.Count; flattened.InternalPageId = post.Page.Id; flattened.PageId = post.Page.FacebookId; flattened.PageName = post.Page.Name; flattened.PageCategory = post.Page.Category; flattened.PageLikes = post.Page.FanCount; flattened.PageLikesDate = post.Page.Date; flattened.PosterId = post.Poster?.Id; flattened.PosterName = post.Poster?.Name; flattened.GeoPoint = post.GeoPoint; flattened.PlaceId = post.Place?.Id; flattened.PlaceName = post.Place?.Name; flattened.PlaceLocationCity = post.Place?.Location?.City; flattened.PlaceLocationRegion = post.Place?.Location?.Region; flattened.PlaceLocationCountry = post.Place?.Location?.Country; flattened.PlaceLocationLatitude = post.Place?.Location?.Latitude; flattened.PlaceLocationLongitude = post.Place?.Location?.Longitude; flattened.Scraped = post.Scraped; flattened.LastScraped = post.LastScraped; return(flattened); }
public HttpResponseMessage SavePost(ScrapedPost model) { int res = 0; WebScraperService svc = new WebScraperService(); try { res = svc.SavePost(model); return(Request.CreateResponse(HttpStatusCode.OK, res)); } catch (Exception ex) { return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ex.Message)); } }
public IEnumerable <ScrapedPost> ImportPosts(IEnumerable <string> postCSVs) { var posts = new List <ScrapedPost>(); DateTime now = DateTime.Now; int numberSaved = 0; Read(postCSVs, record => { string postId = (string)record["Media Title"]; ScrapedPost savedPost = PostScraper.Get(postId); if (savedPost != null) { // Skip posts that already exist. //Console.WriteLine($"Skipping {postId}."); //continue; } ScrapedPost post = PostScraper.ScrapePost(postId); bool useDatabase = post == null; if (post == null) { // Post has been deleted - we still want to save it.. post = new ScrapedPost { Id = postId }; Console.WriteLine($"Post {postId} does not exist."); } string normalizedPageName = null; foreach (string field in record.Keys) { string trimmedField = field.Trim(); string value = (string)record[field]; // If the post doesn't exist, we need to import various stuff from the page. if (useDatabase) { if (trimmedField == "#_Post_Likes") { // Yuck: whole number likes can have decimal points in the data. // Yuck: some rows are empty, or have invalid entries. if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfLikes)) { Console.WriteLine("Cannot parse number of likes. Skipping..."); post.Reactions = new Reactions { Summary = new ReactionsSummary { TotalCount = -1 } }; continue; } post.Reactions = new Reactions { Summary = new ReactionsSummary { TotalCount = numberOfLikes } }; } else if (trimmedField == "#_Post_Comments") { // Yuck: whole number likes can have decimal points in the data. // Yuck: some rows are empty, or have invalid entries. if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfComments)) { Console.WriteLine("Cannot parse number of comments. Skipping..."); post.Comments = new Comments { Summary = new CommentsSummary { TotalCount = -1 } }; continue; } post.Comments = new Comments { Summary = new CommentsSummary { TotalCount = numberOfComments } }; } else if (trimmedField == "#_Post_Shares") { // Yuck: whole number likes can have decimal points in the data. // Yuck: some rows are empty, or have invalid entries. if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfShares)) { Console.WriteLine("Cannot parse number of shares. Skipping..."); post.Shares = new Shares { Count = -1 }; continue; } post.Shares = new Shares { Count = numberOfShares }; } else if (trimmedField == "Post_Date" || trimmedField == "Excerpt Date") { DateTime date = DateTime.ParseExact(value, "M/d/yyyy", null); post.CreatedTime = date; } else if (trimmedField == "Excerpt Copy") { post.Message = value; } } // Turn the comma separated list of topics into an array. if (trimmedField == "Codes Applied Combined") { IEnumerable <string> topics = value.Split(',').Select(c => c.Trim()); post.Topics = topics; } // Get the page from the post. if (trimmedField == "Page Name") { normalizedPageName = Mappings[value.Trim()].Name; } } // Get the nearest data we have for page likes at the time the post was created. Debug.Assert(normalizedPageName != null); PostScraper.UpdateMetadata(post, normalizedPageName); // Print the progress to make sure we know something is happening. numberSaved++; Console.WriteLine(numberSaved); // Save the post. posts.Add(PostScraper.Save(post, Refresh.False)); }); return(posts); }