public IActionResult Extract(string url, string title) { Console.WriteLine("\n\nURL: {0}, \nTITLE: {1} \n\n", url, title); DateTime now = DateTime.UtcNow; dynamic article = ""; var TOKEN = "4305b7c99372aca246ab9a79fb8658fe"; var client = new RestClient("https://api.diffbot.com/v3/article"); var request = new RestRequest(Method.GET); request.AddParameter("token", TOKEN); request.AddParameter("url", Uri.UnescapeDataString(url)); EventWaitHandle Wait = new AutoResetEvent(false); var asyncHandle = client.ExecuteAsync(request, response => { string content = response.Content; dynamic extract, objects; try { extract = JsonConvert.DeserializeObject(content); objects = extract.objects; // Console.WriteLine(objects[0]); title = title.Replace(":", ""); var site_name = objects[0].siteName; if (site_name == null) { site_name = "GENERAL"; } var sourceKey = string.Format("html:{0}:{1}", site_name, title); var html = JsonConvert.SerializeObject(objects[0]); var obj = new ExtractedArticle { date = now.ToString("u"), content = html }; string s = JsonConvert.SerializeObject(obj); db.StringSet(sourceKey, s); article = extract; } catch (Exception e) { Console.WriteLine("General Exception caught: " + e); } Wait.Set(); }); Wait.WaitOne(); ViewBag.Article = article; return(View("~/Views/Home/ViewArticle.cshtml")); }
static void Main(string[] args) { var _context = new Db(); var count = 0; foreach (var article in _context.Articles.Where(a => !_context.ExtractedArticles.Select(b => b.Sha256Hash).Contains(a.Sha256Hash)).ToArray()) { if (_context.ExtractedArticles.Find(article.Sha256Hash) != null) { continue; } var ea = new ExtractedArticle { Sha256Hash = article.Sha256Hash, FeedID = article.FeedID, PublishedUTC = article.PublishedUTC, Title = DataLayer.ClearText(article.Title) }; var rss = new XmlDocument(); rss.LoadXml(article.RSS20); var node = rss.SelectSingleNode("/item/description"); if (node != null) { ea.Summary = DataLayer.ClearText(node.InnerText); } //node = rss.SelectSingleNode("/item/content"); //if (node != null) //{ // string decoded = HttpUtility.HtmlDecode(node.InnerText); // ea.Content = DataLayer.StripTagsRegexCompiled(decoded).Trim(); //} var xmlNodeList = rss.SelectNodes("/item/category"); if (xmlNodeList != null) { foreach (XmlNode n in xmlNodeList) { if (ea.Categories == null) { ea.Categories = n.InnerText; } else { ea.Categories += ", " + n.InnerText; } } } _context.ExtractedArticles.AddOrUpdate(ea); Console.WriteLine(count++ + " " + ea.Sha256Hash[0] + " " + ea.Title); try { _context.SaveChanges(); } catch (System.Data.Entity.Validation.DbEntityValidationException e) { } catch (EntitySqlException e) { throw; } catch (EntityException e) { throw; } } }