/**
         * Mencari string pada newsList dengan urutan judul baru kemudian isi.
         *
         * @param newsList list berita dari database
         * @param searchQuery search pattern yang digunakan
         * @return List berita yang lolos search dengan lokasi ditemukannya
         */
        public ActionResult Index(string searchQuery, int searchType)
        {
            DbConfiguration.SetConfiguration(new MySqlEFConfiguration());
            String          connectionString = "server=127.0.0.1;User Id=root;password=password;database=db_newscrawler";
            MySqlConnection connection       = new MySqlConnection(connectionString);

            NewsCrawlerDB dbNews = new NewsCrawlerDB(connection, false);

            dbNews.Database.CreateIfNotExists();

            connection.Open();

            // Empty tables
            MySqlCommand cmd = new MySqlCommand("delete from News", connection);

            cmd.ExecuteNonQuery();
            dbNews.SaveChanges();

            MySqlTransaction transaction = connection.BeginTransaction();

            dbNews.Database.UseTransaction(transaction);
            // Get data from RSS
            Loader.loadRSS("http://rss.detik.com/index.php/detikcom", dbNews);
            Loader.loadRSS("http://tempo.co/rss/terkini", dbNews);
            Loader.loadRSS("http://rss.vivanews.com/get/all", dbNews);
            transaction.Commit();

            // Convert news list to array
            News[]           newsArray = dbNews.News.SqlQuery("select * from News").ToArray();
            List <NewsFound> newsFound = new List <NewsFound>();

            // Do Search
            if (searchType == 0)
            {
                newsFound = searchKMP(newsArray, searchQuery);
            }
            else if (searchType == 1)
            {
                newsFound = searchBM(newsArray, searchQuery);
            }
            else if (searchType == 2)
            {
                newsFound = searchRegex(newsArray, searchQuery);
            }
            else if (searchType == 3)
            {
                //debug to show all news
                newsFound = showAll(newsArray);
            }

            ViewBag.regexQuery   = Searcher.regexConvert(searchQuery);
            ViewBag.searchQuery  = searchQuery;
            ViewBag.searchType   = searchType;
            ViewBag.searchResult = newsFound;
            ViewBag.searchCount  = newsFound.Count;

            return(View());
        }
Example #2
0
        public static void loadRSS(string url, NewsCrawlerDB db)
        {
            //Create the XmlDocument.
            XmlDocument doc = new XmlDocument();

            doc.Load(url);

            XmlNodeList linkList    = doc.GetElementsByTagName("guid");
            XmlNodeList titleList   = doc.GetElementsByTagName("title");
            XmlNodeList contentList = doc.GetElementsByTagName("description");
            XmlNodeList dateList    = doc.GetElementsByTagName("pubDate");


            List <News> news       = new List <News>();
            int         titleoff   = titleList.Count - linkList.Count;
            int         contentoff = contentList.Count - linkList.Count;

            string[] content  = new string[contentList.Count - contentoff];
            string   distinct = "//p";

            if (titleList[0].ToString() == "news.detik")
            {
                distinct = "//div[@class='detail_text'][@id='detikdetailtext']";
            }
            else if (titleList[0].ToString() == "Tempo.co News Site")
            {
                distinct = "//p";
            }
            else if (titleList[0].ToString() == "VIVA.co.id")
            {
                distinct = "//span[@itemprop='description']";
            }

            for (int i = 0; i < linkList.Count; i++)
            {
                WebClient client   = new WebClient();
                string    htmlText = null;

                htmlText = client.DownloadString(linkList[i].InnerText);

                if (htmlText != null)
                {
                    HtmlDocument htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(htmlText);
                    var           nodes = htmlDoc.DocumentNode.SelectNodes(distinct);
                    StringBuilder sb    = new StringBuilder();
                    if (nodes != null)
                    {
                        foreach (var item in nodes)
                        {
                            string text = item.OuterHtml;
                            if (!string.IsNullOrEmpty(text))
                            {
                                sb.AppendLine(text.Trim());
                            }
                        }
                    }
                    content[i] = sb.ToString();
                }

                news.Add(new News {
                    url = linkList[i].InnerXml, title = titleList[i + titleoff].InnerXml, content = content[i], date = dateList[i].InnerXml
                });
                db.SaveChanges();
            }
            db.News.AddRange(news);
            db.SaveChanges();
        }