public void FetchAll() { var sources = db.Sources.ToList(); foreach (Source source in sources) { WebClient client = new WebClient(); try { using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(source.RSSurl))) { SyndicationFeed feed = SyndicationFeed.Load(reader); foreach (SyndicationItem syndicationItem in feed.Items) { AddNews(syndicationItem, source); } } } catch (Exception e) { Console.WriteLine(e); } source.LastScrape = DateTime.Now; db.Sources.AddOrUpdate(source); db.SaveChanges(); } }
private DataTable getListRSS() { /* get list URL from RSS */ DataTable dtRSS = new DataTable(); dtRSS.Columns.Add("url"); dtRSS.Columns.Add("hinhanh"); dtRSS.Columns.Add("IdCatalogy"); dtRSS.Columns.Add("IdRSS"); /* get list URL from RSS */ string sql = "select R.idLoai IdCatalogy,R.idRss RSS,F.Name,F.url url from RSSRigisteds R, RssFeeds F where R.idRSS = F.id "; DBClass _db = new DBClass(); DataTable dt = _db.sqlGetData(sql); foreach (DataRow dr in dt.Rows) { string feedUrl = BaseView.GetStringFieldValue(dr, "url"); if (!String.IsNullOrEmpty(feedUrl)) { WebClient client = new WebClient(); using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl))) { SyndicationFeed feed = SyndicationFeed.Load(reader); foreach (SyndicationItem album in feed.Items) { try { DataRow row = dtRSS.NewRow(); row[0] = album.Links[0].Uri; //get hinh const string rx = @"(?<=img\s+src\=[\x27\x22])(?<Url>[^\x27\x22]*)(?=[\x27\x22])"; string hinhanh = ""; foreach (Match m in Regex.Matches(album.Summary.Text, rx, RegexOptions.IgnoreCase | RegexOptions.Multiline)) { hinhanh = m.Groups[1].Value; if (hinhanh.StartsWith("//")) // Google RSS has it { hinhanh = hinhanh.Replace("//", "http://"); } } // end get hinh row[1] = hinhanh; row[2] = BaseView.GetStringFieldValue(dr, "IdCatalogy"); row[3] = BaseView.GetStringFieldValue(dr, "RSS"); dtRSS.Rows.Add(row); } catch { } } } } } return(dtRSS); }
private DataTable getRSS() { DataTable dtRSS = new DataTable(); dtRSS.Columns.Add("url"); dtRSS.Columns.Add("title"); dtRSS.Columns.Add("date"); dtRSS.Columns.Add("Desc"); dtRSS.Columns.Add("urlRss"); dtRSS.Columns.Add("IdCatalogy"); DataTable dt = _db.get_all_LoaiTin_UrlNotNull_RV(); if (dt.Rows.Count > 0) { foreach (DataRow dr in dt.Rows) { try { string feedUrl = BaseView.GetStringFieldValue(dr, "url"); //string feedUrl = "http://ndfloodinfo.com/external.php?type=RSS2&forumids=13"; //feedUrl = "http://gamek.vn/game-online.rss"; if (!String.IsNullOrEmpty(feedUrl)) { WebClient client = new WebClient(); using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl))) { SyndicationFeed feed = SyndicationFeed.Load(reader); foreach (SyndicationItem album in feed.Items) { try { DataRow row = dtRSS.NewRow(); row[0] = album.Links[0].Uri; row[1] = album.Title.Text; row[2] = album.PublishDate; row[3] = album.Summary.Text; row[4] = BaseView.GetStringFieldValue(dr, "url"); row[5] = BaseView.GetStringFieldValue(dr, "id"); dtRSS.Rows.Add(row); } catch { } } } } } catch { } } } return(dtRSS); }
private DataTable getListRSS() { /* get list URL from RSS */ DataTable dtRSS = new DataTable(); dtRSS.Columns.Add("url"); dtRSS.Columns.Add("hinhanh"); /* get list URL from RSS */ string feedUrl = txtURL.Text; if (!String.IsNullOrEmpty(feedUrl)) { WebClient client = new WebClient(); using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl))) { SyndicationFeed feed = SyndicationFeed.Load(reader); foreach (SyndicationItem album in feed.Items) { try { DataRow row = dtRSS.NewRow(); row[0] = album.Links[0].Uri; //get hinh const string rx = @"(?<=img\s+src\=[\x27\x22])(?<Url>[^\x27\x22]*)(?=[\x27\x22])"; string hinhanh = ""; foreach (Match m in Regex.Matches(album.Summary.Text, rx, RegexOptions.IgnoreCase | RegexOptions.Multiline)) { hinhanh = m.Groups[1].Value; if (hinhanh.StartsWith("//")) // Google RSS has it { hinhanh = hinhanh.Replace("//", "http://"); } } // end get hinh row[1] = hinhanh; dtRSS.Rows.Add(row); } catch { } } } } return(dtRSS); }
public static SyndicationFeed GetFeed(string feedUrl) { System.Diagnostics.Debug.Write("** Loading '" + feedUrl + "' ... "); WebClient client = new WebClient(); SyndicationFeed f = new SyndicationFeed(); System.IO.MemoryStream ms = null; try { XmlReader reader; XmlReaderSettings xrs = new XmlReaderSettings(); xrs.DtdProcessing = DtdProcessing.Ignore; // Attempt to load feed using standard XMLReader first because it will properly // throw an exception when HTML is returned, whereas Syndication version will // hang. // use this to download remote files to disk for future cached loading during debug. // client.DownloadFile(feedUrl, UrlToLocalFileName(feedUrl)); //ms = new System.IO.MemoryStream(client.DownloadData(feedUrl)); // Redirection of web Url request to local resource to avoid hammering servers during debug sessions. if (System.IO.File.Exists(UrlToLocalFileName(feedUrl))) { ms = new System.IO.MemoryStream(System.IO.File.ReadAllBytes(UrlToLocalFileName(feedUrl))); } else { ms = new System.IO.MemoryStream(); // dummy up a new ms } try { reader = XmlReader.Create(ms, xrs); f = System.ServiceModel.Syndication.SyndicationFeed.Load(reader); } catch (System.Xml.XmlException XmlEx) // specific date format exception handler { // Some RSS feeds use loosely formatted dates that will cause the regular SyndicationFeed to fail. if (XmlEx.TargetSite.Name == "DateFromString") { ms.Position = 0; // In these cases, use a customized XmlReader that deals with those funky dates. reader = new SyndicationFeedXmlReader(ms); f = System.ServiceModel.Syndication.SyndicationFeed.Load(reader); } else { throw XmlEx; } } finally { client.Dispose(); } // make sure Sourcefeed property is set on items. foreach (SyndicationItem item in f.Items) { item.SourceFeed = f; if (item.Summary == null) item.Summary = new TextSyndicationContent(""); if (item.Content == null) { foreach (SyndicationElementExtension ext in item.ElementExtensions) { if (ext.GetObject<XElement>().Name.LocalName == "encoded") { item.Content = new TextSyndicationContent(ext.GetObject<XElement>().Value); } } } } } catch (System.Net.WebException wex) { // error getting the XML document if (wex.Response != null) { System.Diagnostics.Debug.WriteLine("Error retreiving XML document from '" + feedUrl + "': " + wex.Response.ToString()); } else { if (wex.InnerException!=null) System.Diagnostics.Debug.WriteLine("Error retreiving XML document from '" + feedUrl + "': " + wex.InnerException.Message); } } catch (System.Xml.XmlException XmlEx) { // error parsing response itno XML document System.Diagnostics.Debug.WriteLine("Error parsing XML document from '" + feedUrl + "': " + XmlEx.Message); } catch (Exception) { throw; } System.Diagnostics.Debug.WriteLine("Retreived " + f.Items.Count()); return f; }
public void Import(string rssPath, Guid pageId) { using (var conn = new DataConnection(PublicationScope.Unpublished)) { var mapLinks = new Dictionary <string, string>(); var client = new WebClient(); XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(rssPath)); var feed = SyndicationFeed.Load(reader); reader.Close(); var links = feed.Links.Select(d => d.Uri.ToString()).ToList(); var defaultAuthor = DataFacade.GetData <Authors>().Select(d => d.Name).TheOneOrDefault() ?? "Anonymous"; var blogAuthor = feed.Authors.Select(d => d.Name).FirstOrDefault() ?? feed.ElementExtensions.ReadElementExtensions <string>("creator", "http://purl.org/dc/elements/1.1/").FirstOrDefault();; foreach (var item in feed.Items) { using (new DataScope(PublicationScope.Published)) { var itemDate = item.PublishDate == DateTimeOffset.MinValue ? DateTime.Now : item.PublishDate.DateTime; foreach (var itemLink in item.Links) { mapLinks[itemLink.Uri.OriginalString] = BlogFacade.BuildBlogInternalPageUrl(itemDate, item.Title.Text, pageId); } } } foreach (var item in feed.Items) { try { var content = new XDocument(); string text = null; var itemDate = item.PublishDate == DateTimeOffset.MinValue ? DateTime.Now : item.PublishDate.DateTime; if (text == null && item.Content != null) { var syndicationContent = item.Content as TextSyndicationContent; if (syndicationContent != null) { text = syndicationContent.Text; } } if (text == null) { text = item.ElementExtensions.ReadElementExtensions <string>("encoded", "http://purl.org/rss/1.0/modules/content/") .FirstOrDefault(); } if (text == null && item.Summary != null) { text = item.Summary.Text; } content = MarkupTransformationServices.TidyHtml(text).Output; //somewhere empty <title></title> created foreach (var title in content.Descendants(Namespaces.Xhtml + "title").ToList()) { if (string.IsNullOrWhiteSpace(title.Value)) { title.Remove(); } } foreach (var img in content.Descendants(Namespaces.Xhtml + "img")) { var src = img.GetAttributeValue("src"); if (!string.IsNullOrEmpty(src)) { foreach (var link in links) { if (src.StartsWith(link)) { var newImage = ImportMedia(src, string.Format(FolderFormat, itemDate, item.Title.Text)); if (newImage != null) { img.SetAttributeValue("src", MediaUrlHelper.GetUrl(newImage, true)); } break; } } } } foreach (var a in content.Descendants(Namespaces.Xhtml + "a")) { var href = a.GetAttributeValue("href"); if (!string.IsNullOrEmpty(href)) { foreach (var link in links) { if (href.StartsWith(link)) { if (mapLinks.ContainsKey(href)) { a.SetAttributeValue("href", mapLinks[href]); } else { var extension = Path.GetExtension(href).ToLower(); switch (extension) { case ".jpg": case ".png": case ".gif": case ".pdf": case ".doc": case ".docx": var newMedia = ImportMedia(href, string.Format(FolderFormat, itemDate, item.Title.Text)); a.SetAttributeValue("href", MediaUrlHelper.GetUrl(newMedia, true)); break; default: a.SetAttributeValue("href", new Uri(href).PathAndQuery); break; } } break; } } } } var blogItem = DataFacade.BuildNew <Entries>(); var match = Regex.Match(item.Id, @"\b[A-F0-9]{8}(?:-[A-F0-9]{4}){3}-[A-F0-9]{12}\b", RegexOptions.IgnoreCase); if (match.Success) { var id = Guid.Empty; Guid.TryParse(match.Groups[0].Value, out id); if (id != Guid.Empty && !DataFacade.GetData <Entries>(d => d.Id == id).Any()) { blogItem.Id = id; } } blogItem.Title = item.Title.Text; blogItem.PageId = pageId; blogItem.Teaser = string.Empty; var blogItemAuthor = item.Authors.Select(d => d.Name ?? d.Email).FirstOrDefault() ?? item.ElementExtensions.ReadElementExtensions <string>("creator", "http://purl.org/dc/elements/1.1/").FirstOrDefault(); blogItem.Author = ImportAuthor(blogItemAuthor ?? blogAuthor ?? defaultAuthor); var tagType = DataFacade.GetData <TagType>().FirstOrDefault(); if (tagType == null) { tagType = DataFacade.BuildNew <TagType>(); tagType.Name = "Categories"; DataFacade.AddNew(tagType); } foreach (var tag in item.Categories) { ImportTag(tag.Name, tagType.Id); } blogItem.Tags = string.Join(",", item.Categories.Select(d => d.Name)); blogItem.Content = content.ToString(); blogItem.Date = itemDate; blogItem.PublicationStatus = GenericPublishProcessController.Draft; blogItem = DataFacade.AddNew(blogItem); blogItem.PublicationStatus = GenericPublishProcessController.Published; DataFacade.Update(blogItem); //break; } catch (Exception ex) { Log.LogError("Import Blog", ex); } } //1st redirect var mapLinks2 = new Dictionary <string, string>(); foreach (var maplink in mapLinks.ToList()) { var request = (HttpWebRequest)WebRequest.Create(maplink.Key); request.AllowAutoRedirect = false; var response = (HttpWebResponse)request.GetResponse(); var location = response.Headers["Location"]; if (!string.IsNullOrWhiteSpace(location)) { location = new Uri(new Uri(maplink.Key), location).OriginalString; foreach (var link in links) { if (location.StartsWith(link)) { if (!mapLinks.ContainsKey(location)) { mapLinks[location] = maplink.Value; mapLinks2[location] = maplink.Value; } } } } } //2nd redirect foreach (var maplink in mapLinks2.ToList()) { var request = (HttpWebRequest)WebRequest.Create(maplink.Key); request.AllowAutoRedirect = false; var response = (HttpWebResponse)request.GetResponse(); var location = response.Headers["Location"]; if (!string.IsNullOrWhiteSpace(location)) { location = new Uri(new Uri(maplink.Key), location).OriginalString; foreach (var link in links) { if (location.StartsWith(link)) { if (!mapLinks.ContainsKey(location)) { mapLinks[location] = maplink.Value; } } } } } var mapFile = PathUtil.Resolve(@"~\App_Data\RequestUrlRemappings.xml"); var map = new XElement("RequestUrlRemappings"); if (File.Exists(mapFile)) { map = XElement.Load(mapFile); } map.Add(new XComment(" Imported Blog " + DateTime.Now)); map.Add( mapLinks.Select(d => new XElement("Remapping", new XAttribute("requestPath", new Uri(d.Key).PathAndQuery), new XAttribute("rewritePath", d.Value) )) ); map.Add(new XComment(" ")); map.Save(mapFile); } }