public void Crawl_Tech_Data(string url) { string html = Crawl.Instance.CrawlDataFromUrl(url, httpclient); var News = Regex.Matches(html, @"<div class=""(td_module_10|td_module_mx2|td_module_mx8) td_module_wrap td-animation-stack""(.*?)</div>(.*?)</div>(.*?)</div>", RegexOptions.Singleline); foreach (var item in News) { New congnghe = new New(); var LinkAndImage = Regex.Match(item.ToString(), @"<div class=""td-module-thumb"">(.*?)</div>", RegexOptions.Singleline); // lấy link string link = Regex.Match(LinkAndImage.ToString(), @"href=""(.*?)""", RegexOptions.Singleline).Value.Replace(@"href=""", "").Replace(@"""", ""); congnghe.Link = link; // lấy hình ảnh string image = Regex.Match(LinkAndImage.ToString(), @"src=""(.*?)""", RegexOptions.Singleline).Value.Replace(@"src=""", "").Replace(@"""", ""); congnghe.Image = image; // lấy title string title = Regex.Match(LinkAndImage.ToString(), @"title=""(.*?)""", RegexOptions.Singleline).Value.Replace(@"title=""", "").Replace(@"""", ""); congnghe.Title = title; // lấy time string Temp = Regex.Match(item.ToString(), @"<time class=""entry-date updated td-module-date""(.*?)>(.*?)</time>", RegexOptions.Singleline).Value; string time = Temp.Substring(Temp.IndexOf('>') + 1).Replace("</time>", ""); congnghe.Time = time; // lấy content string Temp_content = Regex.Match(item.ToString(), @"<div class=""td-excerpt"">(.*?)\s</div>", RegexOptions.Singleline).Value; string content = Temp_content.Substring(Temp_content.IndexOf('>') + 1).Replace("</div>", "").Trim(); congnghe.Content = content; this.Dispatcher.Invoke(() => { ListViewNews.Items.Add(congnghe); }); } ListViewNews.AddHandler(ListViewItem.MouseDoubleClickEvent, new MouseButtonEventHandler(item_MouseDoubleClick)); }
public void Crawl_Job_Data(string url) { string html = Crawl.Instance.CrawlDataFromUrl(url, httpclient); var Ne = Regex.Matches(html, @"<div class=""clearfix box-top-level""(.*?)</div>(.*?)</div>(.*?)</div>(.*?)</div>(.*?)</div>(.*?)</div>", RegexOptions.Singleline); foreach (var item in Ne) { Job jobs = new Job(); var LinkAndImage = Regex.Match(item.ToString(), @"<div class=""logo"">(.*?)</div>", RegexOptions.Singleline); // lấy image string image = Regex.Match(LinkAndImage.ToString(), @"src=""(.*?)""", RegexOptions.Singleline).Value.Replace(@"src=""", "").Replace(@"""", ""); jobs.Image = image; // lấy link,title var Links = Regex.Match(item.ToString(), @"<div class=""job-item-info relative"">(.*?)</div>", RegexOptions.Singleline); string link = Regex.Match(Links.ToString(), @"href=""(.*?)""", RegexOptions.Singleline).Value.Replace(@"href=""", "").Replace(@"""", ""); string temp = Regex.Match(Links.ToString(), @"<a (.*?)</a>", RegexOptions.Singleline).Value; string title = temp.Substring(temp.IndexOf('>') + 1).Replace("</a>", "").Trim(); jobs.Link = link; jobs.Title = title; //lấy company string company = Regex.Match(Links.ToString(), @"<div class=""company"">(.*?)</div>", RegexOptions.Singleline).Value.Replace(@"<div class=""company"">", "").Replace(@"""", "").Replace("</div>", ""); jobs.Content = company; //lấy location string temp_locate = Regex.Match(item.ToString(), @"<div class=""extra-info location text-clip"">(.*?)</div>", RegexOptions.Singleline).Value; string locations = Regex.Match(temp_locate.ToString(), @"<span>(.*?)</span>", RegexOptions.Singleline).Value.Replace(@"<span>", "").Replace(@"""", "").Replace("</span>", ""); jobs.Location = locations; //lấy salary string info_salary = Regex.Match(item.ToString(), @"<div class=""extra-info salary"">(.*?)</div>", RegexOptions.Singleline).Value; string temp_salary = Regex.Match(info_salary.ToString(), @" <a (.*?)</a>", RegexOptions.Singleline).Value.Replace(@"<a ", "").Replace(@"""", "").Replace("</a>", ""); string salary = temp_salary.Substring(temp_salary.IndexOf('>') + 1); jobs.Money = salary; this.Dispatcher.Invoke(() => { ListViewNews.Items.Add(jobs); }); ListViewNews.AddHandler(ListViewItem.MouseDoubleClickEvent, new MouseButtonEventHandler(item_MouseDoubleClick)); } }