/** * Mencari string pada newsList dengan urutan judul baru kemudian isi. * * @param newsList list berita dari database * @param searchQuery search pattern yang digunakan * @return List berita yang lolos search dengan lokasi ditemukannya */ public ActionResult Index(string searchQuery, int searchType) { DbConfiguration.SetConfiguration(new MySqlEFConfiguration()); String connectionString = "server=127.0.0.1;User Id=root;password=password;database=db_newscrawler"; MySqlConnection connection = new MySqlConnection(connectionString); NewsCrawlerDB dbNews = new NewsCrawlerDB(connection, false); dbNews.Database.CreateIfNotExists(); connection.Open(); // Empty tables MySqlCommand cmd = new MySqlCommand("delete from News", connection); cmd.ExecuteNonQuery(); dbNews.SaveChanges(); MySqlTransaction transaction = connection.BeginTransaction(); dbNews.Database.UseTransaction(transaction); // Get data from RSS Loader.loadRSS("http://rss.detik.com/index.php/detikcom", dbNews); Loader.loadRSS("http://tempo.co/rss/terkini", dbNews); Loader.loadRSS("http://rss.vivanews.com/get/all", dbNews); transaction.Commit(); // Convert news list to array News[] newsArray = dbNews.News.SqlQuery("select * from News").ToArray(); List <NewsFound> newsFound = new List <NewsFound>(); // Do Search if (searchType == 0) { newsFound = searchKMP(newsArray, searchQuery); } else if (searchType == 1) { newsFound = searchBM(newsArray, searchQuery); } else if (searchType == 2) { newsFound = searchRegex(newsArray, searchQuery); } else if (searchType == 3) { //debug to show all news newsFound = showAll(newsArray); } ViewBag.regexQuery = Searcher.regexConvert(searchQuery); ViewBag.searchQuery = searchQuery; ViewBag.searchType = searchType; ViewBag.searchResult = newsFound; ViewBag.searchCount = newsFound.Count; return(View()); }
public static void loadRSS(string url, NewsCrawlerDB db) { //Create the XmlDocument. XmlDocument doc = new XmlDocument(); doc.Load(url); XmlNodeList linkList = doc.GetElementsByTagName("guid"); XmlNodeList titleList = doc.GetElementsByTagName("title"); XmlNodeList contentList = doc.GetElementsByTagName("description"); XmlNodeList dateList = doc.GetElementsByTagName("pubDate"); List <News> news = new List <News>(); int titleoff = titleList.Count - linkList.Count; int contentoff = contentList.Count - linkList.Count; string[] content = new string[contentList.Count - contentoff]; string distinct = "//p"; if (titleList[0].ToString() == "news.detik") { distinct = "//div[@class='detail_text'][@id='detikdetailtext']"; } else if (titleList[0].ToString() == "Tempo.co News Site") { distinct = "//p"; } else if (titleList[0].ToString() == "VIVA.co.id") { distinct = "//span[@itemprop='description']"; } for (int i = 0; i < linkList.Count; i++) { WebClient client = new WebClient(); string htmlText = null; htmlText = client.DownloadString(linkList[i].InnerText); if (htmlText != null) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(htmlText); var nodes = htmlDoc.DocumentNode.SelectNodes(distinct); StringBuilder sb = new StringBuilder(); if (nodes != null) { foreach (var item in nodes) { string text = item.OuterHtml; if (!string.IsNullOrEmpty(text)) { sb.AppendLine(text.Trim()); } } } content[i] = sb.ToString(); } news.Add(new News { url = linkList[i].InnerXml, title = titleList[i + titleoff].InnerXml, content = content[i], date = dateList[i].InnerXml }); db.SaveChanges(); } db.News.AddRange(news); db.SaveChanges(); }