Example #1
0
        //
        // GET: /News/

        public ActionResult Index()
        {
            //Linq 语法 计算在likes 中ItemID文章的总like数目,也可用EF方法实现,比较麻烦
            // int x = db.likes.Count(like => like.ItemId ==101);

            /******************************
            *  访问rss的地址,读取xml数据
            ******************************/
            rss cars = null;
            //  System.Net.WebClient client = new WebClient();
            //  byte[] page = client.DownloadData("http://rss.nytimes.com/services/xml/rss/nyt/US.xml");
            // string path = System.Text.Encoding.UTF8.GetString(page);
            //string path = "cars.xml";

            XmlSerializer serializer = new XmlSerializer(typeof(rss));

            //HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://rss.nytimes.com/services/xml/rss/nyt/US.xml");
            //使用Cookie设置AllowAutoRedirect属性为false,是解决“尝试自动重定向的次数太多。”的核心
            // request.CookieContainer = new CookieContainer();
            // request.AllowAutoRedirect = false;
            //  WebResponse response = (WebResponse)request.GetResponse();
            //  Stream sm = response.GetResponseStream();
            //  System.IO.StreamReader streamReader = new System.IO.StreamReader(sm);
            //将流转换为字符串
            // string html = streamReader.ReadToEnd();
            // streamReader.Close();

            //  TextReader reader = new StreamReader(path);

            XmlReader reader = new XmlTextReader("http://rss.nytimes.com/services/xml/rss/nyt/US.xml");

            cars = (rss)serializer.Deserialize(reader);

            /*var serializer = new XmlSerializer(typeof(rss));
             * using (TextReader reader = new StringReader(html))
             * {
             *    cars = (rss)serializer.Deserialize(reader);
             * }*/


            //  reader.Close();


            /******************************
            *  初始化最新时间newTime
            ******************************/
            DateTime newTime;

            if (db.sources.Find("NYTimes") == null)
            {
                //db.sources.Add(new Models.source("NYTimes", new DateTime(2000,1,1)));    //Initialize database
                db.sources.Add(new Models.source("NYTimes", DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT")));
            }
            //db.sources.Add(new source { srcName = "NYTimes", newDate = DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT") });
            //db.SaveChanges();

            if (db.sources.Find("NYTimes").newDate.Value < DateTime.Parse("Sat, 07 Feb 2015 00:57:00 GMT"))
            {
                string httpTime = cars.item[cars.item.Length - 1].pubDate;
                newTime = DateTime.Parse(httpTime);
                Models.source src = new Models.source("NYTimes", newTime);
                db.sources.Add(src);
            }
            else
            {
                Models.source src = db.sources.Find("NYTimes");
                newTime = src.newDate.Value;
            }


            /******************************
            *  循环添加每一条新闻条目,只添加新条目
            ******************************/

            for (var i = cars.item.Length - 1; i >= 0; i--)    //old item store into database first
            {
                string   httpTime = cars.item[i].pubDate;
                DateTime time     = DateTime.Parse(httpTime);


                // 每次添加新条目前,先与source里的最新时间对比
                if (time <= newTime)       //time值小于最新时间,舍弃
                {
                    continue;
                }
                else
                {
                    Models.source src = db.sources.Find("NYTimes");
                    src.newDate = time;    //更新时间
                }

                // description里面会带有<和> 之间的多余内容,例如广告,使用正则表达式可以消除掉
                cars.item[i].description = Regex.Replace(cars.item[i].description, "<.*?>", string.Empty);


                Models.item item = new Models.item(cars.item[i], time, "NYTimes", 0);

                db.items.Add(item);               //item include 4 elements

                db.SaveChanges();                 //save DB before calling other function !!


                /**********************************
                * 添加每篇文章同时对keyword表和artKey表进行统计
                * ********************************/
                KeywordAnalyzer ka = new KeywordAnalyzer();

                ka.analyze(item);                   //这里保存的数据库结果,不会传到view的ToList里?TFIDF

                ka.TFIDF(item.Id);                  //随着数据越多,TFIDF效果会越来越精确


                /*****************************
                * 把结果存进article表中。不能放在子函数,否则传不进View?
                *****************************/
                var query2 = db.artKeys
                             .Where(x => x.AId == item.Id)
                             .OrderByDescending(x => x.TFIDF)
                             .Take(3);                                  //获得排序最高的三个关键词
                string str = "";
                foreach (var line in query2)
                {
                    str = str + line.word + ",";
                }

                db.items.Find(item.Id).keyword = str;
                db.SaveChanges();
            }


            //db.SaveChanges();
            return(View(db.items.ToList()));
        }