private void Process(CollegeEntity college, int year, EntranceScoresContext db) { var code = college.code; var name = college.name; var nameUnSign3 = convertToUnSign3(name.ToLower()); var kq = nameUnSign3.Replace(" ", "-") + "-" + code.ToUpper() + ".html?y=" + year; var urlCrawl = "https://diemthi.tuyensinh247.com/diem-chuan/" + kq; ProcessCrawling(urlCrawl, code, year, db); Console.WriteLine(year + "-" + name + " done!"); }
private async void ProcessCrawling(string url, string collegeCode, int year, EntranceScoresContext db) { string[] log = new string[1000]; HtmlAgilityPack.HtmlWeb webSite = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument document = webSite.Load(url); try { var html = document.DocumentNode.SelectSingleNode("//div[@class='tabs']"); var tableNode = html.SelectSingleNode("//table"); int i = 0; foreach (HtmlNode row in tableNode.SelectNodes("tr[@class='bg_white']")) { try { //major.Code = row.SelectNodes("td")[1].InnerText; //major.Name = row.SelectNodes("td")[2].InnerText; //major.GroupCode = row.SelectNodes("td")[3].InnerText; //major.Score = double.Parse(row.SelectNodes("td")[4].InnerText); MajorCollege majorCollege = new MajorCollege(); string majorCode = row.SelectNodes("td")[1].InnerText; if (majorCode == "") { continue; } CollegeEntity collegeEntity = null; MajorEntity majorEntity = null; try { collegeEntity = db.collegeEntities.DefaultIfEmpty().Single(c => c.code.Equals(collegeCode)); majorEntity = db.majorEntities.DefaultIfEmpty().Single(c => c.code.Equals(majorCode)); if (collegeEntity == null) { continue; } if (majorEntity == null) { isAddNewMajor = true; MajorEntity newMajor = new MajorEntity(); newMajor.code = majorCode; newMajor.name = row.SelectNodes("td")[2].InnerText; //Check exist //newMajorEntities.Add(newMajor); try { newMajorMap.Add(newMajor.code, newMajor.name); } catch (ArgumentException e) { newMajorMap[newMajor.code] = newMajor.name; } continue; } if (isAddNewMajor == false) { majorCollege.MajorEntity = majorEntity; majorCollege.CollegeEntity = collegeEntity; majorCollege.groupCode = row.SelectNodes("td")[3].InnerText; majorCollege.year = year; majorCollege.score = double.Parse(row.SelectNodes("td")[4].InnerText); majorColleges.Push(majorCollege); } } catch (InvalidOperationException e) { System.Console.WriteLine(e.Message); } i++; } catch (FormatException e) { System.Console.WriteLine(e.Message); } } }catch (NullReferenceException e) { System.Console.WriteLine(e.Message); } }