Пример #1
0
        public IEnumerable <ScrapedPost> ImportPosts(IEnumerable <string> postCSVs)
        {
            var      posts       = new List <ScrapedPost>();
            DateTime now         = DateTime.Now;
            int      numberSaved = 0;

            Read(postCSVs, record =>
            {
                string postId         = (string)record["Media Title"];
                ScrapedPost savedPost = PostScraper.Get(postId);
                if (savedPost != null)
                {
                    // Skip posts that already exist.
                    //Console.WriteLine($"Skipping {postId}.");
                    //continue;
                }

                ScrapedPost post = PostScraper.ScrapePost(postId);
                bool useDatabase = post == null;
                if (post == null)
                {
                    // Post has been deleted - we still want to save it..
                    post = new ScrapedPost {
                        Id = postId
                    };
                    Console.WriteLine($"Post {postId} does not exist.");
                }

                string normalizedPageName = null;
                foreach (string field in record.Keys)
                {
                    string trimmedField = field.Trim();
                    string value        = (string)record[field];

                    // If the post doesn't exist, we need to import various stuff from the page.
                    if (useDatabase)
                    {
                        if (trimmedField == "#_Post_Likes")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfLikes))
                            {
                                Console.WriteLine("Cannot parse number of likes. Skipping...");
                                post.Reactions = new Reactions
                                {
                                    Summary = new ReactionsSummary {
                                        TotalCount = -1
                                    }
                                };
                                continue;
                            }

                            post.Reactions = new Reactions
                            {
                                Summary = new ReactionsSummary {
                                    TotalCount = numberOfLikes
                                }
                            };
                        }
                        else if (trimmedField == "#_Post_Comments")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfComments))
                            {
                                Console.WriteLine("Cannot parse number of comments. Skipping...");
                                post.Comments = new Comments
                                {
                                    Summary = new CommentsSummary {
                                        TotalCount = -1
                                    }
                                };
                                continue;
                            }

                            post.Comments = new Comments
                            {
                                Summary = new CommentsSummary {
                                    TotalCount = numberOfComments
                                }
                            };
                        }
                        else if (trimmedField == "#_Post_Shares")
                        {
                            // Yuck: whole number likes can have decimal points in the data.
                            // Yuck: some rows are empty, or have invalid entries.
                            if (!int.TryParse(value, NumberStyles.AllowDecimalPoint, null, out int numberOfShares))
                            {
                                Console.WriteLine("Cannot parse number of shares. Skipping...");
                                post.Shares = new Shares {
                                    Count = -1
                                };
                                continue;
                            }

                            post.Shares = new Shares {
                                Count = numberOfShares
                            };
                        }
                        else if (trimmedField == "Post_Date" || trimmedField == "Excerpt Date")
                        {
                            DateTime date    = DateTime.ParseExact(value, "M/d/yyyy", null);
                            post.CreatedTime = date;
                        }
                        else if (trimmedField == "Excerpt Copy")
                        {
                            post.Message = value;
                        }
                    }

                    // Turn the comma separated list of topics into an array.
                    if (trimmedField == "Codes Applied Combined")
                    {
                        IEnumerable <string> topics = value.Split(',').Select(c => c.Trim());
                        post.Topics = topics;
                    }

                    // Get the page from the post.
                    if (trimmedField == "Page Name")
                    {
                        normalizedPageName = Mappings[value.Trim()].Name;
                    }
                }

                // Get the nearest data we have for page likes at the time the post was created.
                Debug.Assert(normalizedPageName != null);
                PostScraper.UpdateMetadata(post, normalizedPageName);

                // Print the progress to make sure we know something is happening.
                numberSaved++;
                Console.WriteLine(numberSaved);

                // Save the post.
                posts.Add(PostScraper.Save(post, Refresh.False));
            });

            return(posts);
        }
 public ScrapedPost GetPost(string id) => PostScraper.Get(id);