protected void ucRssFeed_ItemDataBound(object sender, RssFeedItemEventArgs e)
    {
        if ((e.Item.ItemType == RssFeedItemType.Item) ||
            (e.Item.ItemType == RssFeedItemType.AlternatingItem))
        {
            RssFeedItem item = e.Item;
            switch (item.DataItem.Description)
            {
            case RSSConst.NO_UPDATES:
                DisplayErrorMessage(RSSConst.RSSFeedType.NO_UPDATES);
                break;

            case RSSConst.INCORRECT_FORMAT:
                DisplayErrorMessage(RSSConst.RSSFeedType.INCORRECT_FORMAT);
                break;

            case RSSConst.ORG_CONN_STR_NOT_FOUND:
                DisplayErrorMessage(RSSConst.RSSFeedType.ORG_CONN_STR_NOT_FOUND);
                break;

            case RSSConst.QUERY_STRING_PARAMETERS_NOT_DEFINED:
                DisplayErrorMessage(RSSConst.RSSFeedType.QUERY_STRING_PARAMETERS_NOT_DEFINED);
                break;

            case RSSConst.NO_LOG_ENTRIES:
                Image img = (Image)e.Item.FindControl("imgCategory");
                if (img != null)
                {
                    img.Visible = false;
                }
                break;
            }
        }
    }
        public SimpleCraigslistListenerTest()
        {
            RssFeedItem item1 = new RssFeedItem() { Title = "test item 1", Description = "first item", Link = "phoenix.craigslist.org/1", PublishDate = new DateTime(2012, 01, 01) };
            RssFeedItem item2 = new RssFeedItem() { Title = "test item 2", Description = "second item", Link = "phoenix.craigslist.org/2", PublishDate = new DateTime(2012, 02, 01) };
            RssFeedItem item3 = new RssFeedItem() { Title = "test item 3", Description = "third item", Link = "phoenix.craigslist.org/3", PublishDate = new DateTime(2012, 03, 01) };
            RssFeedItem item4 = new RssFeedItem() { Title = "test item 4", Description = "fourth item", Link = "phoenix.craigslist.org/4", PublishDate = new DateTime(2012, 04, 01) };
            RssFeedItem item5 = new RssFeedItem() { Title = "test item 5", Description = "fifth item", Link = "phoenix.craigslist.org/5", PublishDate = new DateTime(2012, 05, 01) };
            RssFeedItem item6 = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) };
            RssFeedItem item7 = new RssFeedItem() { Title = "test item 7", Description = "7 item", Link = "phoenix.craigslist.org/7", PublishDate = new DateTime(2012, 07, 01) };
            RssFeedItem item8 = new RssFeedItem() { Title = "test item 8", Description = "8 item", Link = "phoenix.craigslist.org/8", PublishDate = new DateTime(2012, 08, 01) };
            RssFeedItem item9 = new RssFeedItem() { Title = "test item 9", Description = "9 item", Link = "phoenix.craigslist.org/9", PublishDate = new DateTime(2012, 09, 01) };
            RssFeedItem item10 = new RssFeedItem() { Title = "test item 10", Description = "10 item", Link = "phoenix.craigslist.org/10", PublishDate = new DateTime(2012, 010, 01) };

            preFeed.Add(item1);
            preFeed.Add(item2);
            preFeed.Add(item3);
            preFeed.Add(item4);
            preFeed.Add(item5);

            postFeed.Add(item1);
            postFeed.Add(item2);
            postFeed.Add(item3);
            postFeed.Add(item4);
            postFeed.Add(item6);
        }
Example #3
0
    private bool TryParseAndSave(RssFeed feed, RssFeedItem item)
    {
        var uri = new Uri(item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url);

        if (uri.AbsolutePath == "/" && string.IsNullOrEmpty(uri.Query))
        {
            Log.Information("URI '{uri}' detected as a home page rather than an article, skipping parse operation", uri);
            return(false);
        }

        try
        {
            // Parse the downloaded file as dictated by the site parsing definitions
            _articleParser.Parse(item);
        }
        catch (Exception ex)
        {
            Log.Error(ex, "PARSE_ERROR: UrlHash '{urlHash}':'{url}'", item.FeedAttributes.UrlHash, item.FeedAttributes.Url);
            return(false);
        }

        try
        {
            _crawlerRepository.SaveDocument <RssFeedItem>(_crawlerCollectionName, item, feed.DatabaseRetentionDays, "", null, "");

            return(true);
        }
        catch (Exception ex)
        {
            Log.Error(ex, "SAVE_ERROR: UrlHash '{urlHash}':'{url}'", item.FeedAttributes.UrlHash, item.FeedAttributes.Url);
        }

        return(false);
    }
Example #4
0
 private void OnItemSelected(RssFeedItem item)
 {
     if (item != null)
     {
         Device.OpenUri(item.Link);
     }
 }
Example #5
0
        private async Task <IEnumerable <RssFeedItem> > GetItemsFromFeed()
        {
            // Retrieve RSS feed
            var result = await RequestWithCookiesAndRetryAsync(RssFeedUri);

            // Parse as XML document
            var xmlDocument = new XmlDocument();

            xmlDocument.LoadXml(result.ContentString);

            // Parse to RssFeedItems
            var xmlNodes = xmlDocument.GetElementsByTagName("item");
            List <RssFeedItem> feedItems = new List <RssFeedItem>();

            foreach (var n in xmlNodes)
            {
                var node = (XmlNode)n;

                if (RssFeedItem.TryParse(node, out RssFeedItem item))
                {
                    feedItems.Add(item);
                }
                else
                {
                    logger.Warn($"Could not parse {DisplayName} RSS item '{node.InnerText}'");
                }
            }

            return(feedItems);
        }
        public void SendEmail()
        {
            RssFeedItem sampleItem = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) };

            string subject = "Craigslist notification, a new post detected - " + sampleItem.Link;
            string body = "A new item has been detected!! <br><br> <a href=\"" + sampleItem.Link + "\">" + sampleItem.Title + "</a><br>" + sampleItem.Description + "";
            PK.SimpleCraigslistListener.Utilities.Utilities.SendMail(subject, body);
            //SimpleCraigslistListener.Program.SendMail(sampleItem, "custom body");
        }
 public void CompareRssFeedItem()
 {
     RssFeedItem item6 = new RssFeedItem() { Title = "test item 6", Description = "6 item", Link = "phoenix.craigslist.org/6", PublishDate = new DateTime(2012, 06, 01) };
     List<RssFeedItem> result = postFeed.Except(preFeed).ToList();
     RssFeedItem resultDif = result.First();
     Assert.AreEqual(item6.Link, resultDif.Link);
     Assert.AreEqual(item6.Description, resultDif.Description);
     Assert.AreEqual(item6.PublishDate, resultDif.PublishDate);
 }
Example #8
0
        public void RemovingHtmlCanHandleEmptyDescription()
        {
            // Setup
            var xDoc    = XDocumentFactory.CreateSampleFeed();
            var firstEl = xDoc.Descendants("item").First();

            firstEl.Element("description").Value = string.Empty;

            // Act
            var rssFeedItem = new RssFeedItem(firstEl);

            // Verify
            Assert.AreEqual(string.Empty, rssFeedItem.Description);
        }
Example #9
0
        public void CanRemoveHtmlFromDescription()
        {
            // Setup
            var xDoc    = XDocumentFactory.CreateSampleFeed();
            var firstEl = xDoc.Descendants("item").First();

            firstEl.Element("description").Value = "some element <b>no html</b>";

            // Act
            var rssFeedItem = new RssFeedItem(firstEl);

            // Verify
            Assert.AreEqual("some element no html", rssFeedItem.Description);
        }
Example #10
0
    public void Parse(RssFeedItem item)
    {
        // Article failed to download for some reason, skip over meta data processing
        if (!File.Exists(item.FeedAttributes.FileName))
        {
            Log.Debug("No file to parse, skipping metadata values for '{url}'", item.FeedAttributes.Url);
            return;
        }

        // Graphics file or PDF won't have og tags
        if (item.FeedAttributes.FileName.EndsWith(".png") ||
            item.FeedAttributes.FileName.EndsWith(".jpg") ||
            item.FeedAttributes.FileName.EndsWith(".gif") ||
            item.FeedAttributes.FileName.EndsWith(".pdf"))
        {
            Log.Information("Binary file detected, skipping metadata values for '{url}'", item.FeedAttributes.Url);
            return;
        }

        Log.Debug("Parsing meta tags from file '{fileName}'", item.FeedAttributes.FileName);

        var doc = new HtmlDocument();

        doc.Load(item.FeedAttributes.FileName);

        // Parse the meta data from the raw HTML document
        item.OpenGraphAttributes.Add(ParseOpenGraphAttributes(doc));
        item.HtmlAttributes.Add(ParseHtmlAttributes(doc));
        item.HostName = GetHostName(item);
        item.SiteName = GetSiteName(item);

        // Check if we have a site parser defined for the site name
        var definition = _definitionFactory.Get(item.SiteName);

        using (LogContext.PushProperty("siteName", item.SiteName))
        {
            // Determine the named parser to use along with article and para selectors
            (string namedParser, string articleSelector, string paragraphSelector) = GetRouteMatchedTagParser(definition, GetRouteOnly(item));

            // Resolve the named parameter using DI
            var parser = _container.ResolveNamed <ITagParser>(namedParser);
            parser.Initialize(doc.Text, item);

            // Parse the content to get the article text
            parser.PreParse();
            item.HtmlAttributes.Add("ParserResult", parser.ParseTagsBySelector(articleSelector, paragraphSelector));
            parser.PostParse();
        }
    }
Example #11
0
    protected virtual string GetCanonicalUrl(RssFeedItem item)
    {
        // The best reference URL is usually from the OpenGraph tags, however they are NOT
        // always set to a full canonical URL (looking at you, frontpagemag.com)
        string url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? "";

        // If the URL doesn't have a protocol assigned (not canonical) fall back to the URL
        // we crawled (which also might be null)
        if (!url.StartsWith("http"))
        {
            url = item.HtmlAttributes.GetValueOrDefault("Url");
        }

        // Last but not least, fall back to the URL we detected in the feed
        return(url ?? item.FeedAttributes.Url);
    }
Example #12
0
            public EraiRawsReleaseInfo(RssFeedItem feedItem)
            {
                Title       = StripTitle(feedItem.Title);
                Quality     = feedItem.Quality;
                Size        = ReleaseInfo.GetBytes(feedItem.Size);
                DetailsLink = ParseDetailsLink(feedItem.Description);

                if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri))
                {
                    MagnetLink = magnetUri;
                }

                if (DateTimeOffset.TryParse(feedItem.PublishDate, out DateTimeOffset publishDate))
                {
                    PublishDate = publishDate;
                }
            }
Example #13
0
            public EraiRawsReleaseInfo(RssFeedItem feedItem)
            {
                var splitTitle = SplitQualityAndTitle(feedItem.Title);

                Quality = splitTitle.quality;
                Title   = splitTitle.title;

                if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri))
                {
                    Link = magnetUri;
                }

                if (DateTimeOffset.TryParse(feedItem.PublishDate, out DateTimeOffset publishDate))
                {
                    PublishDate = publishDate;
                }
            }
Example #14
0
        public IActionResult Put(long id, [FromBody] RssFeedItem item)
        {
            var feed = _context.RssFeedItems.Find(id);

            if (feed == null)
            {
                return(NotFound());
            }

            feed.Name    = item.Name;
            feed.FeedUrl = item.FeedUrl;
            feed.SiteUrl = item.SiteUrl;

            _context.RssFeedItems.Update(feed);
            _context.SaveChanges();
            return(NoContent());
        }
Example #15
0
            public static bool TryParse(XmlNode rssItem, out RssFeedItem item)
            {
                var title       = rssItem.SelectSingleNode("title")?.InnerText;
                var link        = rssItem.SelectSingleNode("link")?.InnerText;
                var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText;

                if (string.IsNullOrWhiteSpace(title) ||
                    string.IsNullOrWhiteSpace(link) ||
                    string.IsNullOrWhiteSpace(publishDate))
                {
                    // One of the properties was empty so fail to parse
                    item = null;
                    return(false);
                }

                item = new RssFeedItem(title, link, publishDate);
                return(true);
            }
Example #16
0
    private string GetHostName(RssFeedItem item)
    {
        string url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? "";

        // Make sure the Url is complete
        if (!url.StartsWith("http"))
        {
            url = item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url;
        }

        if (!url.StartsWith("http"))
        {
            url = _webUtils.RepairUrl(url, item.FeedAttributes.Url);
        }

        Uri uri = new Uri(url);

        return(uri.GetComponents(UriComponents.Host, UriFormat.Unescaped).ToLower());
    }
Example #17
0
            public static bool TryParse(XmlNamespaceManager nsm, XmlNode rssItem, out RssFeedItem item)
            {
                var title       = rssItem.SelectSingleNode("title")?.InnerText;
                var link        = rssItem.SelectSingleNode("link")?.InnerText;
                var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText;
                var size        = rssItem.SelectSingleNode("erai:size", nsm)?.InnerText;
                var description = rssItem.SelectSingleNode("description")?.InnerText;
                var quality     = rssItem.SelectSingleNode("erai:res", nsm)?.InnerText;

                item = new RssFeedItem
                {
                    Title       = title,
                    Link        = link,
                    PublishDate = publishDate,
                    Size        = size,
                    Description = description,
                    Quality     = quality
                };
                return(item.IsValid());
            }
Example #18
0
        private async Task <IEnumerable <RssFeedItem> > GetItemsFromFeed()
        {
            // Retrieve RSS feed
            var result = await RequestWithCookiesAndRetryAsync(RssFeedUri);

            if (result.IsRedirect)
            {
                await FollowIfRedirect(result);
            }

            // Parse as XML document
            var xmlDocument = new XmlDocument();

            xmlDocument.LoadXml(result.ContentString);

            var nsm = new XmlNamespaceManager(xmlDocument.NameTable);

            nsm.AddNamespace("erai", "https://www.erai-raws.info/rss-page/");

            // Parse to RssFeedItems
            var xmlNodes = xmlDocument.GetElementsByTagName("item");
            List <RssFeedItem> feedItems = new List <RssFeedItem>();

            foreach (var n in xmlNodes)
            {
                var node = (XmlNode)n;

                if (RssFeedItem.TryParse(nsm, node, out RssFeedItem item))
                {
                    feedItems.Add(item);
                }
                else
                {
                    logger.Warn($"Could not parse {DisplayName} RSS item '{node.OuterXml}'");
                }
            }

            return(feedItems);
        }
Example #19
0
    protected virtual void SetExtendedArticleMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName)
    {
        // Extract the meta data from the Open Graph tags helpfully provided with almost every article
        string url = exportFeedItem.Url;

        exportFeedItem.Url = item.OpenGraphAttributes.GetValueOrDefault("og:url") ?? "";

        // Make sure the Url is complete
        if (!exportFeedItem.Url.StartsWith("http"))
        {
            exportFeedItem.Url = item.HtmlAttributes.GetValueOrDefault("Url") ?? item.FeedAttributes.Url;
        }

        // Extract the meta data from the Open Graph tags
        exportFeedItem.ArticleText = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? "";
        exportFeedItem.Subtitle    = item.OpenGraphAttributes.GetValueOrDefault("og:title") ?? null;
        exportFeedItem.ImageUrl    = item.OpenGraphAttributes.GetValueOrDefault("og:image") ?? null;
        exportFeedItem.SiteName    = item.OpenGraphAttributes.GetValueOrDefault("og:site_name")?.ToLower() ?? "";
        exportFeedItem.HostName    = hostName;

        // Fixup apnews on populist press links which sometimes report incorrectly
        if (string.IsNullOrWhiteSpace(exportFeedItem.SiteName) || (exportFeedItem.SiteName == "ap news" && exportFeedItem.Url.Contains("populist.press")))
        {
            exportFeedItem.SiteName = exportFeedItem.HostName;
        }

        // Fixup news.trust.org imageUrl links which have an embedded redirect
        if (string.IsNullOrWhiteSpace(exportFeedItem.ImageUrl) || (exportFeedItem.SiteName == "news.trust.org" && exportFeedItem.Url.Contains("news.trust.org")))
        {
            exportFeedItem.ImageUrl = null;
        }

        // Remove the protocol portion if there is one, i.e. 'https://'
        if (exportFeedItem.SiteName.IndexOf('/') > 0)
        {
            exportFeedItem.SiteName = exportFeedItem.SiteName.Substring(exportFeedItem.SiteName.LastIndexOf('/') + 1);
        }
    }
        /// <summary>
        /// Adds the article.
        /// </summary>
        /// <param name="savedArticleList">The saved article list.</param>
        /// <param name="savedFeedItem">The saved feed item.</param>
        public void AddArticle(ObservableCollection <SavedArticle> savedArticleList, RssFeedItem savedFeedItem)
        {
            if (savedArticleList.Count() != 0)
            {
                bool exist = savedArticleList.Any(c => c.Title == savedFeedItem.Item.Title.Text);

                if (exist)
                {
                    MessageBox.Show(Resources.ERROR_SAVING_ARTICLE_MESSAGE, Resources.MESSAGEBOX_ALERT, MessageBoxButton.OK, MessageBoxImage.Information);
                }

                else
                {
                    savedArticleList.Add(new SavedArticle(savedFeedItem.Item.Title.Text, savedFeedItem.Item.Links[0].Uri));
                }
            }

            else
            {
                savedArticleList.Add(new SavedArticle(savedFeedItem.Item.Title.Text, savedFeedItem.Item.Links[0].Uri));
            }

            _saveUtility.SaveArticlesToFile(savedArticleList);
        }
        public static List<RssFeedItem> GetItemFeed(string url)
        {
            List<RssFeedItem> feed = new List<RssFeedItem>();

            XmlDocument doc = new XmlDocument();
            XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
            nsmgr.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
            nsmgr.AddNamespace("rss", "http://purl.org/rss/1.0/");
            nsmgr.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
            XmlTextReader reader = new XmlTextReader(url);
            doc.Load(reader);
            XmlNodeList nodes = doc.SelectNodes("/rdf:RDF//rss:item", nsmgr);

            foreach (XmlNode node in nodes)
            {
                XmlNodeList titleNode = node.SelectNodes("rss:title", nsmgr);
                XmlNodeList descNode = node.SelectNodes("rss:description", nsmgr);
                XmlNodeList linkNode = node.SelectNodes("rss:link", nsmgr);
                XmlNodeList dateNode = node.SelectNodes("dc:date", nsmgr);

                string title = titleNode.Count == 0 ? "" : titleNode[0].InnerText;
                string desc = descNode.Count == 0 ? "" : descNode[0].InnerText;
                string link = linkNode.Count == 0 ? "" : linkNode[0].InnerText;
                string date = dateNode.Count == 0 ? "" : dateNode[0].InnerText;

                RssFeedItem item = new RssFeedItem()
                {
                    Description = desc,
                    Title = title,
                    Link = link,
                    PublishDate = DateTime.Parse(date)
                };
                feed.Add(item);
            }
            return feed;
        }
        /// <summary>
        /// Parses the item.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <returns></returns>
        public IFeedItem ParseItem(XmlReader reader)
        {
            IRssFeedItem item = new RssFeedItem();

            reader.ReadToDescendant("item");

            bool readContent = false;

            while (readContent || reader.Read())
            {
                readContent = false;
                if (reader.NodeType == XmlNodeType.Element)
                {
                    readContent = true;
                    switch (reader.Name)
                    {
                    case "author":
                        item.Author = reader.ReadElementContentAsString();
                        break;

                    case "category":
                        using (XmlReader subReader = reader.ReadSubtree())
                        {
                            item.Category = ConvertToIRssCategory(subReader);
                        }
                        if (reader.IsEmptyElement)
                        {
                            readContent = false;
                        }
                        break;

                    case "comments":
                        item.Comments = CachedPropertiesProvider.ConvertToUri(reader.ReadElementContentAsString());
                        break;

                    case "description":
                        item.Description = reader.ReadElementContentAsString();
                        break;

                    case "enclosure":
                        using (XmlReader subReader = reader.ReadSubtree())
                        {
                            item.Enclosure = ConvertToIRssEnclosure(subReader);
                        }
                        if (reader.IsEmptyElement)
                        {
                            readContent = false;
                        }
                        break;

                    case "guid":
                        using (XmlReader subReader = reader.ReadSubtree())
                        {
                            item.Guid = ConvertToIRssGuid(subReader);
                        }
                        if (reader.IsEmptyElement)
                        {
                            readContent = false;
                        }
                        break;

                    case "link":
                        item.Link = CachedPropertiesProvider.ConvertToUri(reader.ReadElementContentAsString());
                        break;

                    case "pubDate":
                        item.PublicationDate = CachedPropertiesProvider.ConvertToTzDateTime(reader.ReadElementContentAsString());
                        break;

                    case "source":
                        using (XmlReader subReader = reader.ReadSubtree())
                        {
                            item.Source = ConvertToIRssSource(subReader);
                        }
                        if (reader.IsEmptyElement)
                        {
                            readContent = false;
                        }
                        break;

                    case "title":
                        item.Title = reader.ReadElementContentAsString();
                        break;

                    default:
                        UnhandledElement(item, reader);
                        break;
                    }
                }
            }
            reader.Close();
            return(item);
        }
Example #23
0
 public void Initialize(string sourceHtml, RssFeedItem item)
 {
     _sourceHtml = sourceHtml;
     _item       = item;
 }
Example #24
0
 protected virtual void SetGraphicMetaData(RssFeedItem item, ExportFeedItem exportFeedItem)
 {
     exportFeedItem.ImageUrl = item.FeedAttributes.Url;
     exportFeedItem.HostName = item.HostName;
     exportFeedItem.SiteName = item.HostName;
 }
Example #25
0
    protected virtual void SetVideoMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName)
    {
        // Extract the meta data from the Open Graph tags
        exportFeedItem.Subtitle = item.OpenGraphAttributes.GetValueOrDefault("og:title") ?? "";
        exportFeedItem.ImageUrl = item.OpenGraphAttributes.GetValueOrDefault("og:image") ?? "";
        exportFeedItem.SiteName = item.OpenGraphAttributes.GetValueOrDefault("og:site_name")?.ToLower() ?? "";
        exportFeedItem.HostName = hostName;
        var description = item.OpenGraphAttributes.GetValueOrDefault("og:description") ?? "";

        if (string.IsNullOrWhiteSpace(exportFeedItem.SiteName))
        {
            exportFeedItem.SiteName = hostName;
        }

        if (item.SiteName == "rumble")
        {
            var text = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? "";
            if (!text.StartsWith("<"))
            {
                Log.Debug("EXPORT: Processing rumble.com ld+json metadata");

                // application/ld+json parser result
                var list = JsonConvert.DeserializeObject <List <JsonLdRumbleValues> >(text);
                foreach (var value in list)
                {
                    if (string.IsNullOrWhiteSpace(value.embedUrl))
                    {
                        continue;
                    }

                    exportFeedItem.VideoUrl    = value.embedUrl;
                    exportFeedItem.VideoHeight = int.TryParse(Convert.ToString(value.height), out int height) ? height : 0;
                    exportFeedItem.VideoWidth  = int.TryParse(Convert.ToString(value.width), out int width) ? width : 0;
                    break;
                }
            }
        }
        else if (item.SiteName == "bitchute")
        {
            Log.Information("EXPORT: Processing bitchute.com metadata");

            // Bitchute logic is a little convoluted, they don't provide much metadata
            var result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? "";
            var start  = result.IndexOf("https://");
            var length = result.IndexOf('\"', start) - start;

            exportFeedItem.VideoUrl    = result.Substring(start, length);
            exportFeedItem.VideoHeight = 1080;
            exportFeedItem.VideoWidth  = 1920;
        }
        else
        {
            Log.Debug("EXPORT: Processing open graph video metadata");

            // Sites that provide video metadata via open graph tags
            exportFeedItem.VideoUrl = item.OpenGraphAttributes.GetValueOrDefault("og:video:secure_url") ??
                                      item.OpenGraphAttributes.GetValueOrDefault("og:video:url") ??
                                      item.OpenGraphAttributes.GetValueOrDefault("og:video") ??
                                      item.OpenGraphAttributes.GetValueOrDefault("og:x:video") ??
                                      "";
            exportFeedItem.VideoHeight = int.TryParse(item.OpenGraphAttributes.GetValueOrDefault("og:video:height") ??
                                                      item.OpenGraphAttributes.GetValueOrDefault("og:x:video:height") ??
                                                      item.OpenGraphAttributes.GetValueOrDefault("og:image:height"), out int height) ? height : 0;
            exportFeedItem.VideoWidth = int.TryParse(item.OpenGraphAttributes.GetValueOrDefault("og:video:width") ??
                                                     item.OpenGraphAttributes.GetValueOrDefault("og:x:video:width") ??
                                                     item.OpenGraphAttributes.GetValueOrDefault("og:image:width"), out int width) ? width : 0;

            string result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? "";
            if (!string.IsNullOrEmpty(result))
            {
                description = result;
            }
        }

        using (LogContext.PushProperty("hostName", hostName))
        {
            Log.Information("Video URL: '{url}' ({height}x{width})", exportFeedItem.VideoUrl, exportFeedItem.VideoHeight, exportFeedItem.VideoWidth);
        }

        // There's no article text for most video sites, so just use the meta description
        exportFeedItem.ArticleText = $"<p>{description}</p>";
    }
Example #26
0
 public static bool IsSameFeedItem(this RssFeedItem left, RssFeedItem right)
 {
     return(left.Guid == right.Guid);
 }
Example #27
0
 protected virtual void SetBasicArticleMetaData(ExportFeedItem exportFeedItem, RssFeedItem item, string hostName)
 {
     exportFeedItem.HostName    = hostName;
     exportFeedItem.SiteName    = hostName;
     exportFeedItem.ArticleText = $"<p>Unable to crawl article content. Click the link below to view in your browser.</p>";
 }
Example #28
0
 public async Task AddItemAsync(RssFeedItem item)
 {
     await this._container.CreateItemAsync <RssFeedItem>(item, new PartitionKey(item.Id));
 }
Example #29
0
 public async Task UpdateItemAsync(string id, RssFeedItem item)
 {
     await this._container.UpsertItemAsync <RssFeedItem>(item, new PartitionKey(id));
 }
 public void UpdateCell(RssFeedItem item)
 {
     Title.Text       = item.Title;
     Description.Text = item.Description;
 }
Example #31
0
 public IActionResult Post([FromBody] RssFeedItem item)
 {
     _context.RssFeedItems.Add(item);
     _context.SaveChanges();
     return(CreatedAtRoute("Get", new { id = item.Id }, item));
 }
Example #32
0
    public ExportFeedItem FormatItem(RssFeedItem item, RssFeed feed)
    {
        // The UrlHash is a hash of the feed source, not the ultimate target URL. This is to avoid
        // over-crawling with link shortening services such as bit.ly and t.co. Once we detect a hash
        // has been crawled from the source, there is no need to crawl again. It means the hash does
        // not truly reflect the target URL, but that's ok as there are duplicate crawls across the
        // different feeds anyway.
        var exportFeedItem = new ExportFeedItem
        {
            Id           = Guid.NewGuid().ToString(),
            FeedId       = item.FeedAttributes.FeedId,
            Url          = GetCanonicalUrl(item),
            UrlHash      = item.FeedAttributes.UrlHash,
            DateAdded    = item.FeedAttributes.DateAdded,
            LinkLocation = item.FeedAttributes.LinkLocation,
            Title        = item.FeedAttributes.Title
        };

        Uri    uri      = new Uri(exportFeedItem.Url);
        string hostName = uri.GetComponents(UriComponents.Host, UriFormat.Unescaped).ToLower();

        var fileName = item.FeedAttributes.FileName ?? "";

        if (fileName.EndsWith(".png") || fileName.EndsWith(".jpg") || fileName.EndsWith(".gif") || fileName.EndsWith(".pdf"))
        {
            SetGraphicMetaData(item, exportFeedItem);
            exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.GraphicTemplate);
            return(exportFeedItem);
        }

        string videoUrl = item.OpenGraphAttributes.GetValueOrDefault("og:video:secure_url") ??
                          item.OpenGraphAttributes.GetValueOrDefault("og:video:url") ??
                          item.OpenGraphAttributes.GetValueOrDefault("og:video") ??
                          item.OpenGraphAttributes.GetValueOrDefault("og:x:video") ??
                          "";
        // Some sites do not provide OpenGraph video tags so watch for those specifically
        string videoType = item.OpenGraphAttributes.GetValueOrDefault("og:video:type") ??
                           item.OpenGraphAttributes.GetValueOrDefault("og:x:video:type") ??
                           (videoUrl.EndsWith(".mp4") || item.SiteName == "bitchute" ? "video/mp4" :
                            videoUrl.Contains("youtube.com") || item.SiteName == "rumble" ? "text/html" : "");

        bool hasSupportedVideoFormat = (videoUrl.Length > 0 || item.SiteName == "rumble" || item.SiteName == "bitchute") &&
                                       (videoType == "text/html" || videoType == "video/mp4" || videoType == "application/x-mpegURL");

        if (hasSupportedVideoFormat)
        {
            Log.Debug("Applying video metadata values for '{hostname}'", hostName);
            SetVideoMetaData(exportFeedItem, item, hostName);
            if (exportFeedItem.VideoHeight > 0)
            {
                if (videoType == "video/mp4" || videoType == "application/x-mpegURL")
                {
                    exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.Mp4VideoTemplate);
                }
                else
                {
                    exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.HtmlVideoTemplate);
                }
            }
            else
            {
                exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.ExtendedTemplate);
            }
        }
        else
        {
            var result = item.HtmlAttributes.GetValueOrDefault("ParserResult") ?? "";
            if (string.IsNullOrEmpty(result))
            {
                Log.Debug("No parsed result, applying basic metadata values for '{hostname}'", hostName);

                // Article failed to download, display minimal basic meta data
                SetBasicArticleMetaData(exportFeedItem, item, hostName);
                exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.BasicTemplate);
            }
            else
            {
                Log.Debug("Applying extended metadata values for '{hostname}'", hostName);

                SetExtendedArticleMetaData(exportFeedItem, item, hostName);
                exportFeedItem.ArticleText = ApplyTemplateToDescription(exportFeedItem, feed, ExportTemplates.ExtendedTemplate);
            }
        }

        return(exportFeedItem);
    }
Example #33
0
 /// <summary>
 /// Saves the articles.
 /// </summary>
 /// <param name="savedArticleList">The saved article list.</param>
 /// <param name="feedItem">The feed item.</param>
 public void SaveArticles(ObservableCollection <SavedArticle> savedArticleList, RssFeedItem feedItem)
 {
     _savedFeedItemManager.AddArticle(savedArticleList, feedItem);
 }
Example #34
0
        private async Task SendRssItem(UserDetails user, RssFeedItem item, bool isMuted)
        {
            const string ReutersLink = "http://feeds.reuters.com/";

            if (Configuration.USE_EMAIL_NOTIFICATION)
            {
                var text = item.Description;
                // handicapt mode for ruters

                var origin =
                    item.Origin
                    .Replace("meduza.io", "meduza")
                    .Replace("feeds.reuters.com", "reuters")
                    .Replace("feeds.feedburner.com", "feedburner")
                    .Replace("www.rte.ie", "RTE")
                    .Replace("www.xkcd.com", "xkcd");

                var message = $@"
<html>
<header></header>
<body>
<a href=""{item.Link}""><h2>{item.Title}</h2></a>
<b>Publication date: {item.PublicationDate:yyyy-MMM-dd HH:mm}</b>
<br/>
<br/>
{item.Description}
<br/>
<br/>
<a href=""{item.Link}"">Read More</a>
</body>
</html>
";

                await MailClient.SendMail(
                    Configuration.NOTIFICATION_EMAIL_DST,
                    $"{origin}: {item.Title}",
                    body : message,
                    isHtmlFormatted : true);
            }
            else if (!item.Link.ToLower().Contains(ReutersLink))
            {
                var message = $"[{item.Title}]({item.Link})";

                await API.SendMessage(
                    user.ChatId.ToString(),
                    message,
                    disable_notification : isMuted?(bool?)true : null,
                    disable_web_page_preview : false,
                    parse_mode : "Markdown"
                    );
            }
            else
            {
                var text = item.Description.Length > 200
                    ? item.Description.Substring(0, 200)
                    : item.Description;
                // handicapt mode for ruters
                var message = $"*{item.Title}*\n\n_{text}_\n\n[Read More]({item.Link})";

                await API.SendMessage(
                    user.ChatId.ToString(),
                    message,
                    disable_notification : isMuted?(bool?)true : null,
                    disable_web_page_preview : true,
                    parse_mode : "Markdown"
                    );
            }
        }