Beispiel #1
0
        public IActionResult Extract(string url, string title)
        {
            Console.WriteLine("\n\nURL: {0},  \nTITLE: {1} \n\n", url, title);

            DateTime now     = DateTime.UtcNow;
            dynamic  article = "";

            var TOKEN   = "4305b7c99372aca246ab9a79fb8658fe";
            var client  = new RestClient("https://api.diffbot.com/v3/article");
            var request = new RestRequest(Method.GET);

            request.AddParameter("token", TOKEN);
            request.AddParameter("url", Uri.UnescapeDataString(url));

            EventWaitHandle Wait = new AutoResetEvent(false);

            var asyncHandle = client.ExecuteAsync(request, response =>
            {
                string content = response.Content;
                dynamic extract, objects;
                try
                {
                    extract = JsonConvert.DeserializeObject(content);
                    objects = extract.objects;
                    // Console.WriteLine(objects[0]);

                    title = title.Replace(":", "");

                    var site_name = objects[0].siteName;
                    if (site_name == null)
                    {
                        site_name = "GENERAL";
                    }
                    var sourceKey = string.Format("html:{0}:{1}", site_name, title);
                    var html      = JsonConvert.SerializeObject(objects[0]);
                    var obj       = new ExtractedArticle
                    {
                        date    = now.ToString("u"),
                        content = html
                    };

                    string s = JsonConvert.SerializeObject(obj);
                    db.StringSet(sourceKey, s);
                    article = extract;
                }
                catch (Exception e)
                {
                    Console.WriteLine("General Exception caught: " + e);
                }
                Wait.Set();
            });

            Wait.WaitOne();
            ViewBag.Article = article;
            return(View("~/Views/Home/ViewArticle.cshtml"));
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            var _context = new Db();
            var count    = 0;

            foreach (var article in _context.Articles.Where(a =>
                                                            !_context.ExtractedArticles.Select(b => b.Sha256Hash).Contains(a.Sha256Hash)).ToArray())
            {
                if (_context.ExtractedArticles.Find(article.Sha256Hash) != null)
                {
                    continue;
                }
                var ea = new ExtractedArticle
                {
                    Sha256Hash   = article.Sha256Hash,
                    FeedID       = article.FeedID,
                    PublishedUTC = article.PublishedUTC,
                    Title        = DataLayer.ClearText(article.Title)
                };
                var rss = new XmlDocument();
                rss.LoadXml(article.RSS20);
                var node = rss.SelectSingleNode("/item/description");
                if (node != null)
                {
                    ea.Summary = DataLayer.ClearText(node.InnerText);
                }
                //node = rss.SelectSingleNode("/item/content");
                //if (node != null)
                //{
                //    string decoded = HttpUtility.HtmlDecode(node.InnerText);
                //    ea.Content = DataLayer.StripTagsRegexCompiled(decoded).Trim();
                //}

                var xmlNodeList = rss.SelectNodes("/item/category");
                if (xmlNodeList != null)
                {
                    foreach (XmlNode n in xmlNodeList)
                    {
                        if (ea.Categories == null)
                        {
                            ea.Categories = n.InnerText;
                        }
                        else
                        {
                            ea.Categories += ", " + n.InnerText;
                        }
                    }
                }
                _context.ExtractedArticles.AddOrUpdate(ea);
                Console.WriteLine(count++ + " " + ea.Sha256Hash[0] + " " + ea.Title);
                try
                {
                    _context.SaveChanges();
                }
                catch (System.Data.Entity.Validation.DbEntityValidationException e)
                {
                }
                catch (EntitySqlException e)
                {
                    throw;
                }
                catch (EntityException e)
                {
                    throw;
                }
            }
        }