コード例 #1
0
        private int ReadSource(Source source,bool? IsForTest)
        {
            var count = 0;
            var client = new WebClient();
            try
            {
                using (var stream = client.OpenRead(source.StreamUrl))
                {
                    try
                    {
                        #region RssFeed
                        RssFeed feed = new RssFeed();
                        feed.Load(stream);
                        foreach (var i in feed.Channel.Items)
                        {
                            try
                            {
                                if (ReadFeedItem(i, source,IsForTest))
                                    count++;
                                Thread.Sleep(500);
                            }

                            catch (Exception e)
                            {
                                var failed = new FailedUrl();

                                failed.Url = i.Link.ToString();
                                failed.Exception = e.Message;
                                this.Data.FailedUrls.Add(failed);
                                this.Data.SaveChanges();
                                continue;
                            }
                        }
                        #endregion RssFeed
                    }
                    catch (FormatException f)
                    {
                        #region AtomFeed
                        AtomFeed afeed = new AtomFeed();
                        using (var astream = client.OpenRead(source.StreamUrl))
                        {
                            afeed.Load(astream);
                            foreach (var i in afeed.Entries)
                            {
                                try
                                {
                                    if (ReadFeedItem(i, source,IsForTest))
                                        count++;
                                    Thread.Sleep(500);
                                }

                                catch (Exception e)
                                {
                                    var failed = new FailedUrl();

                                    failed.Url = i.Links.FirstOrDefault().ToString();
                                    failed.Exception = e.Message;
                                    this.Data.FailedUrls.Add(failed);
                                    this.Data.SaveChanges();
                                    continue;
                                }
                            }
                        }
                        #endregion AtomFeed
                    }

                    Source s = this.Data.Sources.FirstOrDefault(d => d.Id == source.Id);
                    s.LastUpdated = DateTime.Now;
                    this.Data.Sources.Update(s);
                    this.Data.SaveChanges();

                }
                //try
                //{
                //    if (count > 0)
                //        CacheManager.Clear(CacheRegions.News);
                //}
                //catch (Exception e)
                //{
                //    //TODO handle exc
                //}
            }
            catch (Exception e)
            {
                //TODO handle exc
            }

           
            Console.WriteLine(source.Name);
             return count;
        }
コード例 #2
0
        private void UpdateOldItem(string url, Source source, NewsItem oldItem)
        {
            if (!oldItem.Categories.Any(c => c.Id == source.CategoryId))
            {
                oldItem.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId));
                this.Data.SaveChanges();
            }
            //Parse the article                        
            var forUpdateArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Id == oldItem.Id);

            if (forUpdateArticle == null && string.IsNullOrEmpty(forUpdateArticle.Content))
            {
                forUpdateArticle = ParseArticle(url, source.SourceWebsite.Name);
                this.Data.NewsItems.Update(forUpdateArticle);
                this.Data.SaveChanges();
            }
        }
コード例 #3
0
        private bool ReadFeedItem(AtomEntry i, Source source,bool? IsForTest)
        {

            //Check if there is another article with the same title
            var url = i.Links.FirstOrDefault(l => l.ContentType == "text/html");


            NewsItem oldItem = this.Data.NewsItems.FirstOrDefault(s => s.Title.Trim().ToLower() == i.Title.Content.Trim().ToLower());

            if (oldItem != null && new DateTime(oldItem.DatePublished).Date == i.UpdatedOn.Date)
            {
                UpdateOldItem(url.Uri.ToString(), source, oldItem);

                return false;
            }

            //Parse the article
            var article = ParseArticle(url.Uri.ToString(), source.SourceWebsite.Name);
            if (article == null)
            {
                throw new Exception(string.Format("Connot Parse Article url : {0}", url));
            }

            if (string.IsNullOrEmpty(article.Href))
                article.Href = url.Uri.ToString();

            //Check if there is already an article
            var oldArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Href == article.Href);
            //If its new one
            if (oldArticle == null)
            {
                article.Media = source.SourceWebsite.Name;

                if (string.IsNullOrEmpty(article.Header))
                {
                    article.Header = Regex.Replace(i.Title.Content, @"<img\s[^>]*>(?:\s*?</img>)?", "", RegexOptions.IgnoreCase);
                }

                article.Title = article.Header;
                article.MainPic = article.MainPic;
                article.DatePublished = (i.UpdatedOn == DateTime.MinValue ? DateTime.Now : i.UpdatedOn).Ticks;
                article.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId));
                article.UsedForClassication = false;
                article.IsForTest = IsForTest.HasValue ? IsForTest.Value : false;
                this.Data.NewsItems.Add(article);
                this.Data.SaveChanges();

                return true;
            }
            //if we already have it
            else
            {
                  if (!oldArticle.Categories.Any(c => c.Id == source.CategoryId))
                {
                    oldArticle.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId));
                    this.Data.NewsItems.Update(article);
                    this.Data.SaveChanges();
                }
            }
            return false;
        }
コード例 #4
0
        private bool ReadFeedItem(RssItem i, Source source,bool? IsForTest)
        {

            //Check if there is another article with the same title

            NewsItem oldItem = Data.NewsItems.All().FirstOrDefault(n => n.Title.Trim().ToLower() == i.Title.Trim().ToLower());

            if (oldItem != null && new DateTime(oldItem.DatePublished).Date == i.PublicationDate.Date)
            {
                UpdateOldItem(i.Link.ToString(), source, oldItem);
                return false;
            }

            //Parse the article
            var article = ParseArticle(i.Link.ToString(), source.SourceWebsite.Name);

            if (article == null || string.IsNullOrEmpty(article.Content))
            {
                throw new Exception(string.Format("Connot Parse Article url : {0}", i.Link.ToString()));
            }

            if (string.IsNullOrEmpty(article.Href))
                article.Href = i.Link.ToString();

            //Check if there is already an article
            var oldArticle = this.Data.NewsItems.All().FirstOrDefault(s => s.Href == article.Href);

            //If its new one
            if (oldArticle == null)
            {
                article.Media = source.SourceWebsite.Name;

                if (string.IsNullOrEmpty(article.Header))
                {
                    article.Header = Regex.Replace(i.Title, @"<img\s[^>]*>(?:\s*?</img>)?", "", RegexOptions.IgnoreCase);
                }

                //newsitem.RealId = article.PartitionKey;
                article.Title = article.Header;
                article.CleanContent = BaseHelper.ScrubHtml(article.Content);
                article.DatePublished = (i.PublicationDate == DateTime.MinValue ? DateTime.Now : i.PublicationDate).Ticks;
                article.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId));
                article.UsedForClassication = false;
                article.IsForTest = IsForTest.HasValue ? IsForTest.Value : false;
                this.Data.NewsItems.Add(article);
                this.Data.SaveChanges();

                return true;
            }
            //if we already have it
            else
            {

                if (!oldArticle.Categories.Any(c => c.Id == source.CategoryId))
                {
                    oldArticle.Categories.Add(this.Data.Categories.All().FirstOrDefault(s => s.Id == source.CategoryId));
                    this.Data.NewsItems.Update(article);
                    this.Data.SaveChanges();
                }

            }
            return false;
        }