private void detach_BlogEntries(BlogEntry entity) { this.SendPropertyChanging("BlogEntries"); entity.BlizzArea = null; }
partial void DeleteBlogEntry(BlogEntry instance);
partial void InsertBlogEntry(BlogEntry instance);
partial void UpdateBlogEntry(BlogEntry instance);
public void DoBlogCrawling(string game, string region, string lang) { XmlReader reader = XmlReader.Create(String.Format("http://{0}.battle.net/{1}/{2}/feed/news", region.ToLower(), game.ToLower(), lang.ToLower())); SyndicationFeed feed = SyndicationFeed.Load(reader); List<MinedBlogEntry> blogEntries = (from item in feed.Items select new MinedBlogEntry { Title = item.Title.Text, PublicationDate = ConvertFromDateTimeOffset(item.PublishDate.ToUniversalTime()), DirectLink = item.Links[0].Uri.AbsoluteUri, TimeZone = "UTC" }).ToList(); foreach (var blogEntry in blogEntries) { try { using (ForumBlogsDataContext entities = new ForumBlogsDataContext(this._connStr)) { blogEntry.DirectLinkWithAnchor = String.Format("<a href = \"{0}\"> {1} </a>", blogEntry.DirectLink, blogEntry.Title); blogEntry.EntryNumber = int.Parse(blogEntry.DirectLink.Remove(0, blogEntry.DirectLink.IndexOf("blog/") + 5)); int existenceCount = (from b in entities.BlogEntries where b.BlizzArea.Game.GameAbbreviation == game && b.BlizzArea.Region.RegionAbbreviation == region && b.BlizzArea.Language.LanguageAbbreviation == lang && b.EntryNumber == blogEntry.EntryNumber select b).Count(); if (!(existenceCount > 0)) { Console.WriteLine("Found a new blog entry."); string blogEntryPage = WebClient.GetRawHtml(blogEntry.DirectLink); HtmlDocument document = new HtmlDocument(); document.LoadHtml(blogEntryPage); //article-content -> we want the contents of this div. Reason why we aren't getting it from feed directly is because we want the banner image. string blogContent = String.Empty; string author = String.Empty; ////*[@id="blog"]/div[2] var blogContentDiv = document.DocumentNode.SelectSingleNode("//*[@id=\"blog\"]/div[2]"); if (blogContentDiv != null) { blogContent = blogContentDiv.InnerHtml; author = document.DocumentNode.SelectSingleNode("//*[@id=\"blog\"]/div[1]/a[1]/span[2]").InnerText; } else { var headingImage = document.DocumentNode.SelectSingleNode("//*[@id=\"blog\"]/div/div[2]"); blogContentDiv = document.DocumentNode.SelectSingleNode("//*[@id=\"blog\"]/div/div[3]"); author = document.DocumentNode.SelectSingleNode("//*[@id=\"blog\"]/div/div[1]/div/a").InnerText; if (headingImage != null) { blogContent += headingImage.OuterHtml; } if (blogContentDiv != null) { blogContent += blogContentDiv.InnerHtml; } else { Console.WriteLine("Cannot mine this. The div tags are not in their expected format."); break; } } blogContent = blogContent.Replace("<br>", "<br />"); TimeSpan span = DateTime.UtcNow - blogEntry.PublicationDate; if (span.TotalMinutes > 60) { blogEntry.IsTweeted = true; } BlogEntry newBlogEntry = new BlogEntry(); newBlogEntry.BlizzAreaId = this.BlogPageId; newBlogEntry.BlogTitle = blogEntry.Title; newBlogEntry.BlogDirectLink = blogEntry.DirectLinkWithAnchor; newBlogEntry.BlogContent = blogContent; newBlogEntry.BlogDate = blogEntry.PublicationDate; newBlogEntry.TimeZone = "UTC"; newBlogEntry.EntryNumber = blogEntry.EntryNumber; newBlogEntry.BlogEntryAuthor = author; entities.BlogEntries.InsertOnSubmit(newBlogEntry); entities.SubmitChanges(); } else { Console.WriteLine("Found a blog entry. This blog entry already exists."); //TODO: We should probably check last update stamp. } } } catch (Exception e) { Console.WriteLine("Could not mine blog entry for following reason: \n {0}", e.Message); } } }