private void ParseUniverPage(string pageRefer) { currentUniversity = new UniversityData(); string text; HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(pageRefer); HttpWebResponse resp = (HttpWebResponse)req.GetResponse(); using (StreamReader stream = new StreamReader(resp.GetResponseStream(), Encoding.UTF8)) { text = stream.ReadToEnd(); } Regex regionRegex = new Regex(regionPattern); Match regionMatch = regionRegex.Match(text); string regionName = regionMatch.Groups[1].Value.ToString(); Regex reg = new Regex(bachelorPattern); foreach (Match match in reg.Matches(text)) { string tmp = match.Groups[0].Value.ToString(); Regex innerReg = new Regex(linkPattern); Match found = innerReg.Match(tmp); ParseSpecialityDetails(mainRefer + found.Groups[1].Value.ToString(), regionName); } }
public void StartParse() { if (FileExist()) { ReadFromBinary(); } for (var id = universities.Count == 0 ? startPageId : universities.Count + 1; id <= endPageId; id++) { try { ParseUniverPage(refer + id); if (currentUniversity.UniversityName != null && IsUniversity) { universities.Add(currentUniversity); currentUniversity = null; } if (id % 5 == 0) { Console.WriteLine("id%10==0"); SaveToBinary(); } Console.WriteLine("Ссылка: " + refer + id); if (!IsUniversity) { IsUniversity = true; } } catch (Exception ex) { } } //SaveToXml(); //SaveToBinary(); }
public ParseManager() { IsUniversity = true; universities = new List <UniversityData>(); currentUniversity = null; }
public ParseManager() { IsUniversity = true; universities = new List<UniversityData>(); currentUniversity = null; }
public void StartParse() { if (FileExist()) { ReadFromBinary(); } for (var id = universities.Count == 0 ? startPageId : universities.Count + 1; id <= endPageId; id++) { try { ParseUniverPage(refer + id); if (currentUniversity.UniversityName != null && IsUniversity) { universities.Add(currentUniversity); currentUniversity = null; } if (id % 5 == 0) { Console.WriteLine("id%10==0"); SaveToBinary(); } Console.WriteLine("Ссылка: " + refer + id); if (!IsUniversity) IsUniversity = true; } catch (Exception ex) { } } //SaveToXml(); //SaveToBinary(); }