Exemplo n.º 1
0
        private void Process(CollegeEntity college, int year, EntranceScoresContext db)
        {
            var code        = college.code;
            var name        = college.name;
            var nameUnSign3 = convertToUnSign3(name.ToLower());
            var kq          = nameUnSign3.Replace(" ", "-") + "-" + code.ToUpper() + ".html?y=" + year;
            var urlCrawl    = "https://diemthi.tuyensinh247.com/diem-chuan/" + kq;

            ProcessCrawling(urlCrawl, code, year, db);
            Console.WriteLine(year + "-" + name + " done!");
        }
Exemplo n.º 2
0
        private async void ProcessCrawling(string url, string collegeCode, int year, EntranceScoresContext db)
        {
            string[] log = new string[1000];
            HtmlAgilityPack.HtmlWeb      webSite  = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument document = webSite.Load(url);

            try
            {
                var html = document.DocumentNode.SelectSingleNode("//div[@class='tabs']");

                var tableNode = html.SelectSingleNode("//table");

                int i = 0;

                foreach (HtmlNode row in tableNode.SelectNodes("tr[@class='bg_white']"))
                {
                    try
                    {
                        //major.Code = row.SelectNodes("td")[1].InnerText;
                        //major.Name = row.SelectNodes("td")[2].InnerText;
                        //major.GroupCode = row.SelectNodes("td")[3].InnerText;
                        //major.Score = double.Parse(row.SelectNodes("td")[4].InnerText);

                        MajorCollege majorCollege = new MajorCollege();

                        string majorCode = row.SelectNodes("td")[1].InnerText;

                        if (majorCode == "")
                        {
                            continue;
                        }

                        CollegeEntity collegeEntity = null;
                        MajorEntity   majorEntity   = null;
                        try
                        {
                            collegeEntity = db.collegeEntities.DefaultIfEmpty().Single(c => c.code.Equals(collegeCode));
                            majorEntity   = db.majorEntities.DefaultIfEmpty().Single(c => c.code.Equals(majorCode));

                            if (collegeEntity == null)
                            {
                                continue;
                            }
                            if (majorEntity == null)
                            {
                                isAddNewMajor = true;
                                MajorEntity newMajor = new MajorEntity();
                                newMajor.code = majorCode;
                                newMajor.name = row.SelectNodes("td")[2].InnerText;

                                //Check exist
                                //newMajorEntities.Add(newMajor);
                                try
                                {
                                    newMajorMap.Add(newMajor.code, newMajor.name);
                                }
                                catch (ArgumentException e)
                                {
                                    newMajorMap[newMajor.code] = newMajor.name;
                                }

                                continue;
                            }

                            if (isAddNewMajor == false)
                            {
                                majorCollege.MajorEntity   = majorEntity;
                                majorCollege.CollegeEntity = collegeEntity;
                                majorCollege.groupCode     = row.SelectNodes("td")[3].InnerText;
                                majorCollege.year          = year;
                                majorCollege.score         = double.Parse(row.SelectNodes("td")[4].InnerText);

                                majorColleges.Push(majorCollege);
                            }
                        }
                        catch (InvalidOperationException e)
                        {
                            System.Console.WriteLine(e.Message);
                        }
                        i++;
                    }
                    catch (FormatException e)
                    {
                        System.Console.WriteLine(e.Message);
                    }
                }
            }catch (NullReferenceException e)
            {
                System.Console.WriteLine(e.Message);
            }
        }