public ActionResult Index() { /* HashSet<string> hs = new HashSet<string>(); * var query = from KeywordsTotal in db.keywordsTotal * select KeywordsTotal; * for (int i = 0; i < query.Count(); i++) * { * hs.Add(query.ElementAt(i).keyword); * * }*/ //Linq 语法 计算在likes 中ItemID文章的总like数目,也可用EF方法实现,比较麻烦 // int x = db.likes.Count(like => like.ItemId ==101); /****************************** * 访问rss的地址,读取xml数据 ******************************/ rss newsItems = null; // System.Net.WebClient client = new WebClient(); // byte[] page = client.DownloadData("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); // string path = System.Text.Encoding.UTF8.GetString(page); //string path = "cars.xml"; XmlSerializer serializer = new XmlSerializer(typeof(rss)); //HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); //使用Cookie设置AllowAutoRedirect属性为false,是解决“尝试自动重定向的次数太多。”的核心 // request.CookieContainer = new CookieContainer(); // request.AllowAutoRedirect = false; // WebResponse response = (WebResponse)request.GetResponse(); // Stream sm = response.GetResponseStream(); // System.IO.StreamReader streamReader = new System.IO.StreamReader(sm); //将流转换为字符串 // string html = streamReader.ReadToEnd(); // streamReader.Close(); // TextReader reader = new StreamReader(path); XmlReader reader = new XmlTextReader("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); newsItems = (rss)serializer.Deserialize(reader); /*var serializer = new XmlSerializer(typeof(rss)); * using (TextReader reader = new StringReader(html)) * { * cars = (rss)serializer.Deserialize(reader); * }*/ // reader.Close(); /****************************** * 初始化最新时间newTime ******************************/ DateTime newTime; if (db.sources.Find("NYTimes") == null) { string httpTime = newsItems.item[newsItems.item.Length - 1].pubDate; newTime = DateTime.Parse(httpTime); Models.source src = new Models.source("NYTimes", newTime); db.sources.Add(src); } else { Models.source src = db.sources.Find("NYTimes"); newTime = src.newDate.Value; } /****************************** * 循环添加每一条新闻条目,只添加新条目 ******************************/ for (var i = newsItems.item.Length - 1; i >= 0; i--) //old item store into database first { string httpTime = newsItems.item[i].pubDate; DateTime time = DateTime.Parse(httpTime); // 每次添加新条目前,先与source里的最新时间对比 if (time <= newTime) //time值小于最新时间,舍弃 { continue; } else { Models.source src = db.sources.Find("NYTimes"); src.newDate = time; //更新时间 } // description里面会带有<和> 之间的多余内容,例如广告,使用正则表达式可以消除掉 newsItems.item[i].description = Regex.Replace(newsItems.item[i].description, "<.*?>", string.Empty); string text = newsItems.item[i].title + " " + newsItems.item[i].description; text = text.ToLower(); text = non1.Replace(text, ""); text = non2.Replace(text, ""); text = non3.Replace(text, ""); text = non4.Replace(text, ""); text = non5.Replace(text, ""); text = non6.Replace(text, ""); text = non7.Replace(text, ""); text = non8.Replace(text, ""); text = non9.Replace(text, ""); text = non10.Replace(text, ""); text = non11.Replace(text, ""); text = non12.Replace(text, ""); text = non13.Replace(text, ""); text = non14.Replace(text, ""); text = non15.Replace(text, ""); text = non16.Replace(text, ""); text = non17.Replace(text, ""); text = non18.Replace(text, ""); text = non19.Replace(text, ""); text = non20.Replace(text, ""); char[] sp = new Char[] { ',', '.', ' ', '?', ':', '\'', '‘', '’', '|' }; string[] words = text.Split(sp, StringSplitOptions.RemoveEmptyEntries); // string[] words = text.Split(' '); words[0] = Regex.Replace(words[0], "[\\s\\p{P}\n\r=<>$>+¥^]", ""); words[1] = Regex.Replace(words[1], "[\\s\\p{P}\n\r=<>$>+¥^]", ""); words[2] = Regex.Replace(words[2], "[\\s\\p{P}\n\r=<>$>+¥^]", ""); Models.item item = new Models.item(newsItems.item[i], time, "NYTimes", 0, words[0], words[1], words[2]); // ""); db.items.Add(item); //item include 4 elements db.SaveChanges(); for (int j = 0; j < 3; j++) { db.articleKeyword.Add(new ArticleKeyword(words[j], item.Id)); if (db.keywordsTotal.Find(words[j]) != null) { db.keywordsTotal.Find(words[j]).keywordSum++; } else { db.keywordsTotal.Add(new KeywordsTotal(words[j], 1)); } //db.SaveChanges(); } //db.channel.Add(cars.item[i]); } db.SaveChanges(); /*var query = from item in db.items * where item.imgId == "" * select item; * * foreach (Models.item item in query) * { * item.imgId = "i" + item.Id; * } * db.SaveChanges();*/ return(View()); }
// // GET: /News/ public ActionResult Index() { //Linq 语法 计算在likes 中ItemID文章的总like数目,也可用EF方法实现,比较麻烦 // int x = db.likes.Count(like => like.ItemId ==101); /****************************** * 访问rss的地址,读取xml数据 ******************************/ rss cars = null; // System.Net.WebClient client = new WebClient(); // byte[] page = client.DownloadData("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); // string path = System.Text.Encoding.UTF8.GetString(page); //string path = "cars.xml"; XmlSerializer serializer = new XmlSerializer(typeof(rss)); //HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); //使用Cookie设置AllowAutoRedirect属性为false,是解决“尝试自动重定向的次数太多。”的核心 // request.CookieContainer = new CookieContainer(); // request.AllowAutoRedirect = false; // WebResponse response = (WebResponse)request.GetResponse(); // Stream sm = response.GetResponseStream(); // System.IO.StreamReader streamReader = new System.IO.StreamReader(sm); //将流转换为字符串 // string html = streamReader.ReadToEnd(); // streamReader.Close(); // TextReader reader = new StreamReader(path); XmlReader reader = new XmlTextReader("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); cars = (rss)serializer.Deserialize(reader); /*var serializer = new XmlSerializer(typeof(rss)); * using (TextReader reader = new StringReader(html)) * { * cars = (rss)serializer.Deserialize(reader); * }*/ // reader.Close(); /****************************** * 初始化最新时间newTime ******************************/ DateTime newTime; if (db.sources.Find("NYTimes") == null) { //db.sources.Add(new Models.source("NYTimes", new DateTime(2000,1,1))); //Initialize database db.sources.Add(new Models.source("NYTimes", DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT"))); } //db.sources.Add(new source { srcName = "NYTimes", newDate = DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT") }); //db.SaveChanges(); if (db.sources.Find("NYTimes").newDate.Value < DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT")) { string httpTime = cars.item[cars.item.Length - 1].pubDate; newTime = DateTime.Parse(httpTime); Models.source src = new Models.source("NYTimes", newTime); db.sources.Add(src); } else { Models.source src = db.sources.Find("NYTimes"); newTime = src.newDate.Value; } /****************************** * 循环添加每一条新闻条目,只添加新条目 ******************************/ for (var i = cars.item.Length - 1; i >= 0; i--) //old item store into database first { string httpTime = cars.item[i].pubDate; DateTime time = DateTime.Parse(httpTime); // 每次添加新条目前,先与source里的最新时间对比 if (time <= newTime) //time值小于最新时间,舍弃 { continue; } else { Models.source src = db.sources.Find("NYTimes"); src.newDate = time; //更新时间 } // description里面会带有<和> 之间的多余内容,例如广告,使用正则表达式可以消除掉 cars.item[i].description = Regex.Replace(cars.item[i].description, "<.*?>", string.Empty); Models.item item = new Models.item(cars.item[i], time, "NYTimes", 0); db.items.Add(item); //item include 4 elements db.SaveChanges(); //save DB before calling other function !! /********************************** * 添加每篇文章同时对keyword表和artKey表进行统计 * ********************************/ KeywordAnalyzer ka = new KeywordAnalyzer(); ka.analyze(item); //这里保存的数据库结果,不会传到view的ToList里?TFIDF ka.TFIDF(item.Id); //随着数据越多,TFIDF效果会越来越精确 /***************************** * 把结果存进article表中。不能放在子函数,否则传不进View? *****************************/ var query2 = db.artKeys .Where(x => x.AId == item.Id) .OrderByDescending(x => x.TFIDF) .Take(3); //获得排序最高的三个关键词 string str = ""; foreach (var line in query2) { str = str + line.word + ","; } db.items.Find(item.Id).keyword = str; db.SaveChanges(); } //db.SaveChanges(); return(View(db.items.ToList())); }
// // GET: /Xml2Model/ public ActionResult Index() { //Linq 语法 计算在likes 中ItemID文章的总like数目,也可用EF方法实现,比较麻烦 // int x = db.likes.Count(like => like.ItemId ==101); /****************************** * 访问rss的地址,读取xml数据 ******************************/ rss cars = null; // System.Net.WebClient client = new WebClient(); // byte[] page = client.DownloadData("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); // string path = System.Text.Encoding.UTF8.GetString(page); //string path = "cars.xml"; XmlSerializer serializer = new XmlSerializer(typeof(rss)); //HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://rss.nytimes.com/services/xml/rss/nyt/US.xml"); //使用Cookie设置AllowAutoRedirect属性为false,是解决“尝试自动重定向的次数太多。”的核心 // request.CookieContainer = new CookieContainer(); // request.AllowAutoRedirect = false; // WebResponse response = (WebResponse)request.GetResponse(); // Stream sm = response.GetResponseStream(); // System.IO.StreamReader streamReader = new System.IO.StreamReader(sm); //将流转换为字符串 // string html = streamReader.ReadToEnd(); // streamReader.Close(); // TextReader reader = new StreamReader(path); XmlReader reader = new XmlTextReader("http://rss.cnn.com/rss/cnn_us.rss"); cars = (rss)serializer.Deserialize(reader); /*var serializer = new XmlSerializer(typeof(rss)); * using (TextReader reader = new StringReader(html)) * { * cars = (rss)serializer.Deserialize(reader); * }*/ // reader.Close(); /****************************** * 初始化最新时间newTime ******************************/ DateTime newTime; if (db.sources.Find("CNN") == null) { string httpTime = cars.item[cars.item.Length - 1].pubDate; newTime = DateTime.ParseExact(httpTime, "ddd, dd MMM yyyy HH:mm:ss EST", new CultureInfo("en-US")).AddHours(-1); Models.source src = new Models.source("CNN", newTime); db.sources.Add(src); } else { Models.source src = db.sources.Find("CNN"); newTime = src.newDate.Value; } /****************************** * 循环添加每一条新闻条目,只添加新条目 ******************************/ for (var i = cars.item.Length - 1; i >= 0; i--) //old item store into database first { string httpTime = cars.item[i].pubDate; DateTime time = DateTime.ParseExact(httpTime, "ddd, dd MMM yyyy HH:mm:ss EST", new CultureInfo("en-US")).AddHours(-1); // 每次添加新条目前,先与source里的最新时间对比 if (time <= newTime) //time值小于最新时间,舍弃 { continue; } else { Models.source src = db.sources.Find("CNN"); src.newDate = time; //更新时间 } Models.item item = new Models.item(cars.item[i], time, "CNN", 0, "", "", ""); //, ""); db.items.Add(item); //item include 4 elements //db.channel.Add(cars.item[i]); } db.SaveChanges(); /*var query = from item in db.items * where item.imgId == "" * select item; * * foreach (Models.item item in query) * { * item.imgId = "i" + item.Id; * } * db.SaveChanges();*/ return(View()); }