Esempio n. 1
        public BlogRssItem[] GetBlogPosts(PerformContext context)
            var posts = new List <BlogRssItem>();

            var progressBar = context.WriteProgressBar();
            var blogs       = GetBlogs();

            foreach (var blog in blogs.WithProgress(progressBar, blogs.Length))
                    string raw;
                    context.WriteLine($"Processing blog {blog.Title}");
                    // Initialize a new web client (with the encoding specified for the blog)
                    using (var wc = new WebClient())
                        wc.Encoding = blog.Encoding;

                        // Download the raw XML
                        raw = wc.DownloadString(blog.RssUrl);
                        raw = RemoveLeadingCharacters(raw).Replace("a10:updated", "pubDate");
                    // Parse the XML into a new instance of XElement
                    var feed = XElement.Parse(raw);

                    var channel              = feed.Element("channel");
                    var channelTitle         = channel.GetElementValue("title");
                    var channelLink          = channel.GetElementValue("link");
                    var channelDescription   = channel.GetElementValue("description");
                    var channelLastBuildDate = channel.GetElementValue("lastBuildDate");
                    var channelLangauge      = channel.GetElementValue("language");

                    var rssChannel = new BlogRssChannel
                        Id    = blog.Id,
                        Title = channelTitle,
                        Link  = channelLink

                    var items = channel.GetElements("item");
                    foreach (var item in items)
                        var title = item.GetElementValue("title");
                        var link  = (string.IsNullOrEmpty(item.GetElementValue("link"))
                            ? item.GetElementValue("guid")
                            : item.GetElementValue("link"))

                        var pubDate = GetPublishDate(item);
                        if (pubDate == default(DateTimeOffset))

                        var approvedCategories = new List <string> {
                            "umbraco", "codegarden", "articulate", "examine"
                        var categories = item.GetElements("category");
                        if (categories.Any())
                            var includeItem = title.ToLowerInvariant().ContainsAny(approvedCategories);
                            foreach (var category in categories)
                                // no need to check more if the item is already approved
                                if (includeItem)

                                foreach (var approvedCategory in approvedCategories)
                                    if (category.Value.ToLowerInvariant().Contains(approvedCategory.ToLowerInvariant()))
                                        includeItem = true;

                            if (includeItem == false)
                                var allCategories = string.Join(",", categories.Select(i => i.Value));
                                context.WriteLine($"Not including post titled {title} because it was not in an approved category. The categories it was found in: {allCategories}. [{link}]");

                        // Blog has no category info and posts things unrelated to Umbraco, check there's related keywords in the title
                        if (blog.CheckTitles)
                            var includeItem = false;
                            foreach (var approvedCategory in approvedCategories)
                                if (title.ToLowerInvariant().Contains(approvedCategory.ToLowerInvariant()))
                                    includeItem = true;

                            // Blog post seems unrelated to Umbraco, skip it
                            if (includeItem == false)

                        var blogPost = new BlogRssItem
                            Channel = rssChannel,
                            Title   = title,
                            // some sites store the link in the <guid/> element
                            Link          = link,
                            PublishedDate = pubDate

                catch (Exception ex)
                    context.WriteLine("Unable to get blog posts for: " + blog.RssUrl, ex);

            return(posts.OrderByDescending(x => x.PublishedDate).ToArray());
Esempio n. 2
        public BlogRssItem[] GetBlogPosts(PerformContext context)
            var posts = new List <BlogRssItem>();

            var progressBar = context.WriteProgressBar();
            var blogs       = GetBlogs();

            foreach (var blog in blogs.WithProgress(progressBar, blogs.Length))
                    string       raw;
                    const string userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3393.4 Safari/537.36";
                    context.WriteLine($"Processing blog {blog.Title}");
                    // Initialize a new web client (with the encoding specified for the blog)
                    using (var wc = new WebClient())
                        wc.Headers.Add(HttpRequestHeader.UserAgent, userAgent);
                        wc.Encoding = blog.Encoding;

                        // Download the raw XML
                        raw = wc.DownloadString(blog.RssUrl);
                        raw = RemoveLeadingCharacters(raw).Replace("a10:updated", "pubDate");
                    // Parse the XML into a new instance of XElement
                    var feed = XElement.Parse(raw);

                    var channel              = feed.Element("channel");
                    var channelTitle         = channel.GetElementValue("title");
                    var channelLink          = channel.GetElementValue("link");
                    var channelDescription   = channel.GetElementValue("description");
                    var channelLastBuildDate = channel.GetElementValue("lastBuildDate");
                    var channelLangauge      = channel.GetElementValue("language");

                    var rssChannel = new BlogRssChannel
                        Id    = blog.Id,
                        Title = channelTitle,
                        Link  = channelLink

                    var items = channel.GetElements("item");
                    foreach (var item in items)
                        var title = item.GetElementValue("title");
                        var link  = (string.IsNullOrEmpty(item.GetElementValue("link"))
                            ? item.GetElementValue("guid")
                            : item.GetElementValue("link"))

                        var pubDate = GetPublishDate(item);
                        if (pubDate == default(DateTimeOffset))

                        var approvedCategories = new List <string> {
                            "umbraco", "codegarden", "articulate", "examine"
                        var categories = item.GetElements("category");
                        if (categories.Any())
                            var includeItem = title.ToLowerInvariant().ContainsAny(approvedCategories);
                            foreach (var category in categories)
                                // no need to check more if the item is already approved
                                if (includeItem)

                                foreach (var approvedCategory in approvedCategories)
                                    if (category.Value.ToLowerInvariant().Contains(approvedCategory.ToLowerInvariant()))
                                        includeItem = true;

                            if (includeItem == false)
                                var allCategories = string.Join(",", categories.Select(i => i.Value));
                                context.WriteLine($"Not including post titled {title} because it was not in an approved category. The categories it was found in: {allCategories}. [{link}]");

                        // Blog has no category info and posts things unrelated to Umbraco, check there's related keywords in the title
                        if (blog.CheckTitles)
                            var includeItem = false;
                            foreach (var approvedCategory in approvedCategories)
                                if (title.ToLowerInvariant().Contains(approvedCategory.ToLowerInvariant()))
                                    includeItem = true;

                            // Blog post seems unrelated to Umbraco, skip it
                            if (includeItem == false)

                        var blogPost = new BlogRssItem
                            Channel = rssChannel,
                            Title   = title,
                            // some sites store the link in the <guid/> element
                            Link          = link,
                            PublishedDate = pubDate


                    // Get the avatar locally so that we can use ImageProcessor and serve it over https
                    using (var wc = new WebClient())
                        wc.Headers.Add(HttpRequestHeader.UserAgent, userAgent);
                        var baseLogoPath = HostingEnvironment.MapPath("~/media/blogs/");
                        if (Directory.Exists(baseLogoPath) == false)

                        var logoExtension = GetFileExtension(blog.LogoUrl);
                        var logoPath      = baseLogoPath + blog.Id + logoExtension;

                        wc.DownloadFile(blog.LogoUrl, logoPath);
                catch (Exception ex)
                    context.WriteLine("Unable to get blog posts for: " + blog.RssUrl, ex);

            return(posts.OrderByDescending(x => x.PublishedDate).ToArray());
Esempio n. 3
        public BlogRssItem[] GetBlogPosts()
            var posts = new List <BlogRssItem>();

            foreach (var blog in GetBlogs())
                    string raw;

                    // Need to make sure we try TLS 1.2 first else the connection will just be closed in us
                    // No other protocols allowed SSL * and TLS 1.0 are considered insecure
                    ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11;

                    // Initialize a new web client (with the encoding specified for the blog)
                    using (var wc = new WebClient())
                        wc.Encoding = blog.Encoding;

                        // Download the raw XML
                        raw = wc.DownloadString(blog.RssUrl);
                        raw = RemoveLeadingCharacters(raw).Replace("a10:updated", "pubDate");
                    // Parse the XML into a new instance of XElement
                    var feed = XElement.Parse(raw);

                    var channel              = feed.Element("channel");
                    var channelTitle         = channel.GetElementValue("title");
                    var channelLink          = channel.GetElementValue("link");
                    var channelDescription   = channel.GetElementValue("description");
                    var channelLastBuildDate = channel.GetElementValue("lastBuildDate");
                    var channelLangauge      = channel.GetElementValue("language");

                    var rssChannel = new BlogRssChannel
                        Id    = blog.Id,
                        Title = channelTitle,
                        Link  = channelLink

                    foreach (var item in channel.GetElements("item"))
                        var title = item.GetElementValue("title");
                        var link  = (string.IsNullOrEmpty(item.GetElementValue("link"))
                            ? item.GetElementValue("guid")
                            : item.GetElementValue("link"))

                        var pubDate = GetPublishDate(item);
                        if (pubDate == default(DateTimeOffset))

                        var approvedCategories = new List <string> {
                            "umbraco", "codegarden", "articulate", "examine"
                        var categories = item.GetElements("category");
                        if (categories.Any())
                            var includeItem = title.ToLowerInvariant().ContainsAny(approvedCategories);
                            foreach (var category in categories)
                                // no need to check more if the item is already approved
                                if (includeItem)

                                foreach (var approvedCategory in approvedCategories)
                                    if (category.Value.ToLowerInvariant().Contains(approvedCategory.ToLowerInvariant()))
                                        includeItem = true;

                            if (includeItem == false)
                                var allCategories = string.Join(",", categories.Select(i => i.Value));
                                LogHelper.Info <BlogPostsService>(string.Format("Not including post titled {0} because it was not in an approved category. The categories it was found in: {1}. [{2}]", title, allCategories, link));

                        var blogPost = new BlogRssItem
                            Channel = rssChannel,
                            Title   = title,
                            // some sites store the link in the <guid/> element
                            Link          = link,
                            PublishedDate = pubDate

                catch (Exception ex)
                    LogHelper.Error <BlogPostsService>("Unable to get blog posts for: " + blog.RssUrl, ex);

            return(posts.OrderByDescending(x => x.PublishedDate).ToArray());