Esempio n. 1
0
        /// <summary>
        /// Set the UTC date/time of the post object.
        /// </summary>
        /// <param name="post">The Post object to set the date/time of.</param>
        private void SetPostDate(Post post, string region)
        {
            string DateToParse = post.PostDate;

            if (!String.IsNullOrEmpty(DateToParse) && !DateToParse.Contains("unknown"))
            {
                DateTime PostDate = new DateTime();

                string newDateRaw = String.Empty;
                string[] strDateParts = DateToParse.Split(' ');

                //Careful! This space charcter is of some other ASCII code. It does not work in the split() above
                if (strDateParts[strDateParts.Count() - 1].Contains(' '))
                {
                    strDateParts[strDateParts.Count() - 1] = strDateParts[strDateParts.Count() - 1].Remove(strDateParts[strDateParts.Count() - 1].IndexOf(' '));

                    for (int i = 0; i < strDateParts.Count(); i++)
                    {
                        newDateRaw = newDateRaw + strDateParts[i] + " ";
                    }
                }
                else
                {
                    for (int i = 0; i < strDateParts.Count() - 1; i++)
                    {
                        newDateRaw = newDateRaw + strDateParts[i] + " ";
                    }
                }

                if (DateToParse.Contains("BST") || DateToParse.Contains("CEST") || DateToParse.Contains("CET") || DateToParse.Contains("GMT"))
                {
                    PostDate = DateTime.Parse(newDateRaw, CultureInfo.CreateSpecificCulture("fr-FR"));
                }
                else if (DateToParse.Contains("PDT") || DateToParse.Contains("PST"))
                {
                    PostDate = DateTime.Parse(newDateRaw, CultureInfo.CreateSpecificCulture("en-US"));
                }
                else
                {
                    if (region == "EU")
                    {
                        PostDate = DateTime.Parse(newDateRaw, CultureInfo.CreateSpecificCulture("fr-FR"));
                    }
                    else if (region == "US")
                    {
                        PostDate = DateTime.Parse(newDateRaw, CultureInfo.CreateSpecificCulture("en-US"));
                    }
                }

                if (DateToParse.Contains("BST"))
                    PostDate = PostDate.AddHours(-1);
                else if (DateToParse.Contains("CEST"))
                    PostDate = PostDate.AddHours(-2);
                else if (DateToParse.Contains("CET"))
                    PostDate = PostDate.AddHours(-1);
                else if (DateToParse.Contains("PST"))
                    PostDate = PostDate.AddHours(+8);
                else if (DateToParse.Contains("PDT"))
                    PostDate = PostDate.AddHours(+7);

                post.PostDateTimeUtc = PostDate;
            }
        }
Esempio n. 2
0
        private void CrawlSpecifiedTopics(Topic topic)
        {
            //We are going to make sure page 1 is always crawled.
            List<int> pageNos = new List<int>();
            pageNos.Add(1);
            if (!topic.LastCrawledPage.HasValue)
            {
                topic.LastCrawledPage = 1;
            }

            for (int i = topic.LastCrawledPage.Value; i <= topic.NoPages; i++)
            {
                if (i != 1)
                {
                    pageNos.Add(i);
                }
            }

            int lastCrawledPage = 0;

            for (int i = 0; i <= pageNos.Count - 1; i++)
            {
                string ht = WebClient.GetRawHtml(String.Format("http://{0}.battle.net/{1}/{2}/forum/topic/{3}?page={4}",
                                                   topic.ForumBoard.BlizzArea.Region.RegionAbbreviation.ToLower(),
                                                   topic.ForumBoard.BlizzArea.Game.GameAbbreviation.ToLower(),
                                                   topic.ForumBoard.BlizzArea.Language.LanguageAbbreviation.ToLower(),
                                                   topic.ThreadNumber,
                                                   pageNos[i]));
                HtmlDocument document = new HtmlDocument();
                document.LoadHtml(ht);

                var divTags = document.GetElementbyId("post-list").ChildNodes.Where(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("topic-post"));

                foreach (var div in divTags)
                {
                    bool indexThisPost = false;
                    string postContent = null;
                    string avatarLink = null;
                    string linkDirectPost = null;
                    string postDate = null;
                    string posterName = null;
                    string posterSpecialTitle = null;
                    string postEdited = null;
                    short posterType = 0;

                    if (div.HasChildNodes)
                    {

                        if (div.InnerHtml.Contains("<span id=\"1\">")) //incredibly fragile
                        {
                            indexThisPost = true;
                        }

                        if (div.Attributes["class"].Value.Contains("blizzard"))
                        {
                            indexThisPost = true;
                            posterType = 2;
                        }
                        else if (div.Attributes["class"].Value.Contains("mvp"))
                        {
                            indexThisPost = true;
                            posterType = 1;
                        }

                        if (indexThisPost)
                        {
                            #region Post Content
                            //*[@id="post-64426546361"]/div[1]/table/tr/td[2]/div
                            var postContentTag = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[2]/div", div.Id));

                            if (postContentTag != null)
                            {
                                postContent = String.Empty;

                                postContent += postContentTag.InnerHtml;
                            }
                            #endregion

                            #region avatar

                            // //*[@id="post-79781202523"]

                            var avatarImgTag = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[1]/div/div[1]/div/div/a/img", div.Id));

                            if (avatarImgTag != null)
                            {
                                avatarLink = avatarImgTag.OuterHtml;

                                string src = avatarLink.Remove(0, avatarLink.IndexOf("src") + 1);
                                src = src.Remove(0, src.IndexOf("\"") + 1);
                                src = src.Remove(src.IndexOf("\""));

                                if (!src.Contains("battle.net") && !src.Contains("cms/user_avatar") && !src.Contains("media.blizzard")) //TODO: Second condition here might conflic in other regions. Check for this.
                                {
                                    src = String.Format("http://{0}.battle.net/", topic.ForumBoard.BlizzArea.Region.RegionAbbreviation.ToLower()) + src;
                                }

                                avatarLink = String.Format("<img alt=\"avatar\" src = \"{0}\" width={1} height={2}", src, "{0}", "{1} />");
                            }

                            #endregion

                            #region Direct Post Link

                            var indexATag = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[3]/div/a", div.Id));

                            if (indexATag == null) //deleted MVP post
                            {
                                indexATag = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div[1]/table/tr/td[3]/div/a", div.Id));
                            }

                            if (indexATag != null)
                            {
                                linkDirectPost = String.Format("http://{0}.battle.net/{1}/{2}/forum/topic/{3}?page={4}{5}",
                                                               topic.ForumBoard.BlizzArea.Region.RegionAbbreviation.ToLower(),
                                                               topic.ForumBoard.BlizzArea.Game.GameAbbreviation.ToLower(),
                                                               topic.ForumBoard.BlizzArea.Language.LanguageAbbreviation.ToLower(),
                                                               topic.ThreadNumber,
                                                               pageNos[i],
                                                               indexATag.Attributes["href"].Value);
                            }

                            #endregion

                            #region Post Date

                            var postDateDiv = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[3]/div/div[1]", div.Id));

                            if (postDateDiv != null)
                            {
                                postDate = postDateDiv.Attributes["data-tooltip"].Value.Replace("&nbsp;", " ");
                            }

                            #endregion

                            #region Poster Name

                            var posterNameSpan = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[1]/div/div[2]/div[1]/a/span", div.Id));

                            //*[@id="post-66887172203"]/div[1]/table/tr/td[1]/div/div/div/a/span
                            if (posterNameSpan == null)
                            {
                                posterNameSpan = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div[1]/table/tr/td[1]/div/div/div/a/span", div.Id));
                            }

                            if (posterNameSpan != null)
                            {
                                posterName = posterNameSpan.InnerText;
                            }

                            #endregion

                            #region Poster Special Title

                            if (posterType == 2)
                            {
                                var posterSpecialTitleDiv = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[1]/div/div[2]/div[2]", div.Id));

                                if (posterSpecialTitleDiv != null)
                                {
                                    posterSpecialTitle = posterSpecialTitleDiv.InnerText;
                                }
                            }

                            #endregion

                            #region Post Edited

                            var postEditedDiv = document.DocumentNode.SelectSingleNode(String.Format("//*[@id=\"{0}\"]/div/table/tr/td[2]/div[2]", div.Id));

                            if (postEditedDiv != null)
                            {
                                postEdited = postEditedDiv.OuterHtml;
                            }
                            #endregion

                            //Combining post edited with post content.
                            if (!String.IsNullOrEmpty(postEdited))
                            {
                                postContent += postEdited;
                            }

                            if (postContent != null)
                                postContent = postContent.Replace("<br>", "<br />");

                            //Does this post exist? Assume false for now.
                            bool postExists = false;

                            //Check if the post exists. If it does exist, make sure there weren't any updates on it.
                            using (ForumBlogsDataContext entities = new ForumBlogsDataContext(this._connStr))
                            {
                                int postCount = (from p in entities.Posts
                                                 where p.DirectPostLink == linkDirectPost
                                                 select p).Count();

                                postExists = (postCount > 0);

                            }

                            if (postExists)
                            {
                                using (ForumBlogsDataContext entities = new ForumBlogsDataContext(this._connStr))
                                {
                                    Post post = (from p in entities.Posts
                                                 where p.DirectPostLink == linkDirectPost
                                                 select p).ToList()[0];

                                    if (post.PostContent != postContent || post.AvatarLinkOfPost != avatarLink)
                                    {
                                        post.PostContent = postContent;
                                        post.AvatarLinkOfPost = avatarLink;
                                    }

                                    entities.SubmitChanges();
                                }
                            }
                            else
                            {
                                using (ForumBlogsDataContext entities = new ForumBlogsDataContext(this._connStr))
                                {
                                    if (postContent != null)
                                    {
                                        Post post = new Post();
                                        post.TopicId = topic.Id;
                                        post.DirectPostLink = linkDirectPost;
                                        post.PosterName = posterName;
                                        post.PostDate = postDate;
                                        post.PosterType = posterType;
                                        post.PosterSpecialTitle = posterSpecialTitle;
                                        post.AvatarLinkOfPost = avatarLink;
                                        post.PostContent = postContent;

                                        try
                                        {
                                            SetPostDate(post, topic.ForumBoard.BlizzArea.Region.RegionAbbreviation);
                                        }
                                        catch (Exception) //This will happen if time zone is not specified
                                        {
                                        }

                                        //if (post.PosterType == 2)
                                        //{
                                        //    topic.LastPostDate = post.PostDateTimeUtc.Value;
                                        //}

                                        entities.Posts.InsertOnSubmit(post);
                                        entities.SubmitChanges();
                                    }

                                }
                            }

                        }
                    }
                }

                lastCrawledPage = pageNos[i];

            }

            using (ForumBlogsDataContext entities = new ForumBlogsDataContext(this._connStr))
            {
                Topic topicToSave = (from t in entities.Topics
                                     where t.Id == topic.Id
                                     select t).Single();

                var lastBlue = (from p in entities.Posts
                                where p.TopicId == topicToSave.Id
                                && p.PosterType == 2
                                orderby p.PostDateTimeUtc descending
                                select p).ToList();

                if (lastBlue.Count() > 0)
                    topicToSave.LastPostDate = lastBlue[0].PostDateTimeUtc.Value;

                topicToSave.LastCrawledPage = lastCrawledPage;
                topicToSave.IsBeingCrawled = false;
                topicToSave.ToBeCrawled = false;
                entities.SubmitChanges();
            }
        }
 partial void DeletePost(Post instance);
 partial void UpdatePost(Post instance);
 partial void InsertPost(Post instance);
 private void detach_Posts(Post entity)
 {
     this.SendPropertyChanging("Posts");
     entity.Topic = null;
 }
 private void attach_Posts(Post entity)
 {
     this.SendPropertyChanging("Posts");
     entity.Topic = this;
 }