Exemple #1
0
        public void FetchAll()
        {
            var sources = db.Sources.ToList();

            foreach (Source source in sources)
            {
                WebClient client = new WebClient();
                try
                {
                    using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(source.RSSurl)))
                    {
                        SyndicationFeed feed = SyndicationFeed.Load(reader);

                        foreach (SyndicationItem syndicationItem in feed.Items)
                        {
                            AddNews(syndicationItem, source);
                        }
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                }
                source.LastScrape = DateTime.Now;
                db.Sources.AddOrUpdate(source);
                db.SaveChanges();
            }
        }
    private DataTable getListRSS()
    {
        /* get list URL from RSS */
        DataTable dtRSS = new DataTable();

        dtRSS.Columns.Add("url");
        dtRSS.Columns.Add("hinhanh");
        dtRSS.Columns.Add("IdCatalogy");
        dtRSS.Columns.Add("IdRSS");
        /* get list URL from RSS */


        string    sql = "select R.idLoai IdCatalogy,R.idRss RSS,F.Name,F.url url from RSSRigisteds R, RssFeeds F where R.idRSS = F.id ";
        DBClass   _db = new DBClass();
        DataTable dt  = _db.sqlGetData(sql);

        foreach (DataRow dr in dt.Rows)
        {
            string feedUrl = BaseView.GetStringFieldValue(dr, "url");
            if (!String.IsNullOrEmpty(feedUrl))
            {
                WebClient client = new WebClient();
                using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl)))
                {
                    SyndicationFeed feed = SyndicationFeed.Load(reader);
                    foreach (SyndicationItem album in feed.Items)
                    {
                        try
                        {
                            DataRow row = dtRSS.NewRow();
                            row[0] = album.Links[0].Uri;

                            //get hinh
                            const string rx      = @"(?<=img\s+src\=[\x27\x22])(?<Url>[^\x27\x22]*)(?=[\x27\x22])";
                            string       hinhanh = "";
                            foreach (Match m in Regex.Matches(album.Summary.Text, rx, RegexOptions.IgnoreCase | RegexOptions.Multiline))
                            {
                                hinhanh = m.Groups[1].Value;
                                if (hinhanh.StartsWith("//")) // Google RSS has it
                                {
                                    hinhanh = hinhanh.Replace("//", "http://");
                                }
                            }
                            // end get hinh

                            row[1] = hinhanh;
                            row[2] = BaseView.GetStringFieldValue(dr, "IdCatalogy");
                            row[3] = BaseView.GetStringFieldValue(dr, "RSS");
                            dtRSS.Rows.Add(row);
                        }
                        catch { }
                    }
                }
            }
        }
        return(dtRSS);
    }
    private DataTable getRSS()
    {
        DataTable dtRSS = new DataTable();

        dtRSS.Columns.Add("url");
        dtRSS.Columns.Add("title");
        dtRSS.Columns.Add("date");
        dtRSS.Columns.Add("Desc");
        dtRSS.Columns.Add("urlRss");
        dtRSS.Columns.Add("IdCatalogy");
        DataTable dt = _db.get_all_LoaiTin_UrlNotNull_RV();

        if (dt.Rows.Count > 0)
        {
            foreach (DataRow dr in dt.Rows)
            {
                try
                {
                    string feedUrl = BaseView.GetStringFieldValue(dr, "url");
                    //string feedUrl = "http://ndfloodinfo.com/external.php?type=RSS2&forumids=13‎";
                    //feedUrl = "http://gamek.vn/game-online.rss";
                    if (!String.IsNullOrEmpty(feedUrl))
                    {
                        WebClient client = new WebClient();
                        using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl)))
                        {
                            SyndicationFeed feed = SyndicationFeed.Load(reader);
                            foreach (SyndicationItem album in feed.Items)
                            {
                                try
                                {
                                    DataRow row = dtRSS.NewRow();
                                    row[0] = album.Links[0].Uri;
                                    row[1] = album.Title.Text;
                                    row[2] = album.PublishDate;
                                    row[3] = album.Summary.Text;
                                    row[4] = BaseView.GetStringFieldValue(dr, "url");
                                    row[5] = BaseView.GetStringFieldValue(dr, "id");
                                    dtRSS.Rows.Add(row);
                                }
                                catch { }
                            }
                        }
                    }
                }
                catch { }
            }
        }
        return(dtRSS);
    }
Exemple #4
0
    private DataTable getListRSS()
    {
        /* get list URL from RSS */
        DataTable dtRSS = new DataTable();

        dtRSS.Columns.Add("url");
        dtRSS.Columns.Add("hinhanh");
        /* get list URL from RSS */

        string feedUrl = txtURL.Text;

        if (!String.IsNullOrEmpty(feedUrl))
        {
            WebClient client = new WebClient();
            using (XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(feedUrl)))
            {
                SyndicationFeed feed = SyndicationFeed.Load(reader);
                foreach (SyndicationItem album in feed.Items)
                {
                    try
                    {
                        DataRow row = dtRSS.NewRow();
                        row[0] = album.Links[0].Uri;

                        //get hinh
                        const string rx      = @"(?<=img\s+src\=[\x27\x22])(?<Url>[^\x27\x22]*)(?=[\x27\x22])";
                        string       hinhanh = "";
                        foreach (Match m in Regex.Matches(album.Summary.Text, rx, RegexOptions.IgnoreCase | RegexOptions.Multiline))
                        {
                            hinhanh = m.Groups[1].Value;
                            if (hinhanh.StartsWith("//")) // Google RSS has it
                            {
                                hinhanh = hinhanh.Replace("//", "http://");
                            }
                        }
                        // end get hinh
                        row[1] = hinhanh;
                        dtRSS.Rows.Add(row);
                    }
                    catch { }
                }
            }
        }
        return(dtRSS);
    }
Exemple #5
0
        public static SyndicationFeed GetFeed(string feedUrl)
        {
            System.Diagnostics.Debug.Write("** Loading '" + feedUrl + "' ... ");

              WebClient client = new WebClient();
              SyndicationFeed f = new SyndicationFeed();
              System.IO.MemoryStream ms = null;

              try
              {
            XmlReader reader;
            XmlReaderSettings xrs = new XmlReaderSettings();
            xrs.DtdProcessing = DtdProcessing.Ignore;

            // Attempt to load feed using standard XMLReader first because it will properly
            // throw an exception when HTML is returned, whereas Syndication version will
            // hang.

            // use this to download remote files to disk for future cached loading during debug.
            // client.DownloadFile(feedUrl, UrlToLocalFileName(feedUrl));

            //ms = new System.IO.MemoryStream(client.DownloadData(feedUrl));

            // Redirection of web Url request to local resource to avoid hammering servers during debug sessions.
            if (System.IO.File.Exists(UrlToLocalFileName(feedUrl)))
            {
              ms = new System.IO.MemoryStream(System.IO.File.ReadAllBytes(UrlToLocalFileName(feedUrl)));
            }
            else
            {
              ms = new System.IO.MemoryStream(); // dummy up a new ms
            }

            try
            {
              reader = XmlReader.Create(ms, xrs);
              f = System.ServiceModel.Syndication.SyndicationFeed.Load(reader);
            }
            catch (System.Xml.XmlException XmlEx) // specific date format exception handler
            {
              // Some RSS feeds use loosely formatted dates that will cause the regular SyndicationFeed to fail.
              if (XmlEx.TargetSite.Name == "DateFromString")
              {
            ms.Position = 0;
            // In these cases, use a customized XmlReader that deals with those funky dates.
            reader = new SyndicationFeedXmlReader(ms);
            f = System.ServiceModel.Syndication.SyndicationFeed.Load(reader);
              }
              else
              {
            throw XmlEx;
              }
            }
            finally
            {
              client.Dispose();
            }

            // make sure Sourcefeed property is set on items.
            foreach (SyndicationItem item in f.Items)
            {
              item.SourceFeed = f;

              if (item.Summary == null) item.Summary = new TextSyndicationContent("");
              if (item.Content == null)
              {
            foreach (SyndicationElementExtension ext in item.ElementExtensions)
            {
              if (ext.GetObject<XElement>().Name.LocalName == "encoded")
              {
                item.Content = new TextSyndicationContent(ext.GetObject<XElement>().Value);
              }
            }
              }
            }
              }
              catch (System.Net.WebException wex)
              {
            // error getting the XML document
            if (wex.Response != null)
            {
              System.Diagnostics.Debug.WriteLine("Error retreiving XML document from '" + feedUrl + "': " + wex.Response.ToString());
            }
            else
            {
              if (wex.InnerException!=null) System.Diagnostics.Debug.WriteLine("Error retreiving XML document from '" + feedUrl + "': " + wex.InnerException.Message);
            }

              }
              catch (System.Xml.XmlException XmlEx)
              {
            // error parsing response itno XML document
            System.Diagnostics.Debug.WriteLine("Error parsing XML document from '" + feedUrl + "': " + XmlEx.Message);
              }
              catch (Exception)
              {

            throw;
              }

              System.Diagnostics.Debug.WriteLine("Retreived " + f.Items.Count());
              return f;
        }
        public void Import(string rssPath, Guid pageId)
        {
            using (var conn = new DataConnection(PublicationScope.Unpublished))
            {
                var mapLinks = new Dictionary <string, string>();

                var       client = new WebClient();
                XmlReader reader = new SyndicationFeedXmlReader(client.OpenRead(rssPath));


                var feed = SyndicationFeed.Load(reader);
                reader.Close();

                var links         = feed.Links.Select(d => d.Uri.ToString()).ToList();
                var defaultAuthor = DataFacade.GetData <Authors>().Select(d => d.Name).TheOneOrDefault() ?? "Anonymous";
                var blogAuthor    = feed.Authors.Select(d => d.Name).FirstOrDefault()
                                    ?? feed.ElementExtensions.ReadElementExtensions <string>("creator", "http://purl.org/dc/elements/1.1/").FirstOrDefault();;


                foreach (var item in feed.Items)
                {
                    using (new DataScope(PublicationScope.Published))
                    {
                        var itemDate = item.PublishDate == DateTimeOffset.MinValue ? DateTime.Now : item.PublishDate.DateTime;
                        foreach (var itemLink in item.Links)
                        {
                            mapLinks[itemLink.Uri.OriginalString] = BlogFacade.BuildBlogInternalPageUrl(itemDate, item.Title.Text, pageId);
                        }
                    }
                }

                foreach (var item in feed.Items)
                {
                    try
                    {
                        var    content  = new XDocument();
                        string text     = null;
                        var    itemDate = item.PublishDate == DateTimeOffset.MinValue ? DateTime.Now : item.PublishDate.DateTime;



                        if (text == null && item.Content != null)
                        {
                            var syndicationContent = item.Content as TextSyndicationContent;
                            if (syndicationContent != null)
                            {
                                text = syndicationContent.Text;
                            }
                        }
                        if (text == null)
                        {
                            text = item.ElementExtensions.ReadElementExtensions <string>("encoded", "http://purl.org/rss/1.0/modules/content/")
                                   .FirstOrDefault();
                        }
                        if (text == null && item.Summary != null)
                        {
                            text = item.Summary.Text;
                        }

                        content = MarkupTransformationServices.TidyHtml(text).Output;

                        //somewhere empty <title></title> created
                        foreach (var title in content.Descendants(Namespaces.Xhtml + "title").ToList())
                        {
                            if (string.IsNullOrWhiteSpace(title.Value))
                            {
                                title.Remove();
                            }
                        }


                        foreach (var img in content.Descendants(Namespaces.Xhtml + "img"))
                        {
                            var src = img.GetAttributeValue("src");
                            if (!string.IsNullOrEmpty(src))
                            {
                                foreach (var link in links)
                                {
                                    if (src.StartsWith(link))
                                    {
                                        var newImage = ImportMedia(src, string.Format(FolderFormat, itemDate, item.Title.Text));
                                        if (newImage != null)
                                        {
                                            img.SetAttributeValue("src", MediaUrlHelper.GetUrl(newImage, true));
                                        }
                                        break;
                                    }
                                }
                            }
                        }

                        foreach (var a in content.Descendants(Namespaces.Xhtml + "a"))
                        {
                            var href = a.GetAttributeValue("href");
                            if (!string.IsNullOrEmpty(href))
                            {
                                foreach (var link in links)
                                {
                                    if (href.StartsWith(link))
                                    {
                                        if (mapLinks.ContainsKey(href))
                                        {
                                            a.SetAttributeValue("href", mapLinks[href]);
                                        }
                                        else
                                        {
                                            var extension = Path.GetExtension(href).ToLower();
                                            switch (extension)
                                            {
                                            case ".jpg":
                                            case ".png":
                                            case ".gif":
                                            case ".pdf":
                                            case ".doc":
                                            case ".docx":
                                                var newMedia = ImportMedia(href, string.Format(FolderFormat, itemDate, item.Title.Text));
                                                a.SetAttributeValue("href", MediaUrlHelper.GetUrl(newMedia, true));
                                                break;

                                            default:
                                                a.SetAttributeValue("href", new Uri(href).PathAndQuery);
                                                break;
                                            }
                                        }
                                        break;
                                    }
                                }
                            }
                        }

                        var blogItem = DataFacade.BuildNew <Entries>();

                        var match = Regex.Match(item.Id, @"\b[A-F0-9]{8}(?:-[A-F0-9]{4}){3}-[A-F0-9]{12}\b", RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            var id = Guid.Empty;
                            Guid.TryParse(match.Groups[0].Value, out id);
                            if (id != Guid.Empty && !DataFacade.GetData <Entries>(d => d.Id == id).Any())
                            {
                                blogItem.Id = id;
                            }
                        }

                        blogItem.Title  = item.Title.Text;
                        blogItem.PageId = pageId;
                        blogItem.Teaser = string.Empty;

                        var blogItemAuthor = item.Authors.Select(d => d.Name ?? d.Email).FirstOrDefault() ??
                                             item.ElementExtensions.ReadElementExtensions <string>("creator",
                                                                                                   "http://purl.org/dc/elements/1.1/").FirstOrDefault();


                        blogItem.Author = ImportAuthor(blogItemAuthor ?? blogAuthor ?? defaultAuthor);

                        var tagType = DataFacade.GetData <TagType>().FirstOrDefault();
                        if (tagType == null)
                        {
                            tagType      = DataFacade.BuildNew <TagType>();
                            tagType.Name = "Categories";
                            DataFacade.AddNew(tagType);
                        }

                        foreach (var tag in item.Categories)
                        {
                            ImportTag(tag.Name, tagType.Id);
                        }
                        blogItem.Tags = string.Join(",", item.Categories.Select(d => d.Name));

                        blogItem.Content = content.ToString();
                        blogItem.Date    = itemDate;

                        blogItem.PublicationStatus = GenericPublishProcessController.Draft;
                        blogItem = DataFacade.AddNew(blogItem);
                        blogItem.PublicationStatus = GenericPublishProcessController.Published;
                        DataFacade.Update(blogItem);



                        //break;
                    }
                    catch (Exception ex)
                    {
                        Log.LogError("Import Blog", ex);
                    }
                }

                //1st redirect
                var mapLinks2 = new Dictionary <string, string>();
                foreach (var maplink in mapLinks.ToList())
                {
                    var request = (HttpWebRequest)WebRequest.Create(maplink.Key);
                    request.AllowAutoRedirect = false;
                    var response = (HttpWebResponse)request.GetResponse();
                    var location = response.Headers["Location"];
                    if (!string.IsNullOrWhiteSpace(location))
                    {
                        location = new Uri(new Uri(maplink.Key), location).OriginalString;
                        foreach (var link in links)
                        {
                            if (location.StartsWith(link))
                            {
                                if (!mapLinks.ContainsKey(location))
                                {
                                    mapLinks[location]  = maplink.Value;
                                    mapLinks2[location] = maplink.Value;
                                }
                            }
                        }
                    }
                }
                //2nd redirect
                foreach (var maplink in mapLinks2.ToList())
                {
                    var request = (HttpWebRequest)WebRequest.Create(maplink.Key);
                    request.AllowAutoRedirect = false;
                    var response = (HttpWebResponse)request.GetResponse();
                    var location = response.Headers["Location"];
                    if (!string.IsNullOrWhiteSpace(location))
                    {
                        location = new Uri(new Uri(maplink.Key), location).OriginalString;
                        foreach (var link in links)
                        {
                            if (location.StartsWith(link))
                            {
                                if (!mapLinks.ContainsKey(location))
                                {
                                    mapLinks[location] = maplink.Value;
                                }
                            }
                        }
                    }
                }


                var mapFile = PathUtil.Resolve(@"~\App_Data\RequestUrlRemappings.xml");
                var map     = new XElement("RequestUrlRemappings");
                if (File.Exists(mapFile))
                {
                    map = XElement.Load(mapFile);
                }

                map.Add(new XComment(" Imported Blog " + DateTime.Now));
                map.Add(
                    mapLinks.Select(d => new XElement("Remapping",
                                                      new XAttribute("requestPath", new Uri(d.Key).PathAndQuery),
                                                      new XAttribute("rewritePath", d.Value)
                                                      ))

                    );
                map.Add(new XComment(" "));

                map.Save(mapFile);
            }
        }