public pspPrintHistory(string url) { this.URL = url; var mainContent = Scraper.GetMainContentDivOnURL(url); try { var h1 = mainContent.SelectNodes(".//h1").First(); // there is just one in main-content up at the top var relatedPrintsListLink = h1.SelectSingleNode(".//a[@href]"); //in the title var docTypeDiv = mainContent.SelectSingleNode(".//div[@class='section-content simple']"); var typeText = HttpUtility.HtmlDecode(docTypeDiv.InnerText); foreach (var atype in typesMapper) { if (typeText.Contains(atype.Key, StringComparison.OrdinalIgnoreCase)) //case insensitive search { type = atype.Value; break; } } relatedPrintsListURL = Scraper.pspHostAppURL + relatedPrintsListLink.Attributes["href"].Value; var printsListHTMLDiv = Scraper.GetMainContentDivOnURL(relatedPrintsListURL); relatedPrintsURLs = printsListHTMLDiv.SelectNodes(".//a[@href]").Where(link => link.Attributes["href"].Value.Contains("tiskt.sqw")).Select(link => link.Attributes["href"].Value).ToList(); var headingText = HttpUtility.HtmlDecode(h1.InnerText); var dividedTitle = ScraperStringHelper.SplitByString(headingText, relatedPrintsListLink.InnerText); title = dividedTitle.ElementAt(1); var scrapedNumbers = ScraperStringHelper.GetNumbersFromString(relatedPrintsListLink.InnerText); number = scrapedNumbers.First().Value; var links = mainContent.SelectNodes(".//a"); var pspVotingsURLs = links.Where(link => link.Attributes["href"].Value.Contains("hlasy.sqw")).Select(x => x.Attributes["href"].Value).ToList(); relatedpspVotings = new List <pspVoting>(); foreach (var votingLink in pspVotingsURLs) { var voting = new pspVoting(Scraper.pspHostAppURL + votingLink); relatedpspVotings.Add(voting); } var meetingScheduleLinks = links.Where(link => link.Attributes["href"].Value.Contains("ischuze.sqw")).ToList(); //this should always return one element or null if (meetingScheduleLinks.Count != 0) { //implement TryGetDate from meeting schedule var agendaLink = meetingScheduleLinks.First().Attributes["href"].Value; inAgenda = new pspMeetingAgenda(Scraper.pspHostAppURL + agendaLink); } scrapedDate = DateTime.Now; Console.WriteLine("Finished scraping pspPrintHistory"); // date } catch (Exception) { throw; } }
public pspPrintHistory(string url) { this.URL = url; var mainContent = Scraper.GetMainContentDivOnURL(url); try { var h1 = mainContent.SelectNodes(".//h1").First(); // there is just one in main-content up at the top var relatedPrintsListLink = h1.SelectSingleNode(".//a[@href]"); //in the title var docTypeDiv = mainContent.SelectSingleNode(".//div[@class='section-content simple']"); var typeText = HttpUtility.HtmlDecode(docTypeDiv.InnerText); foreach (var atype in typesMapper) { if (typeText.Contains(atype.Key, StringComparison.OrdinalIgnoreCase)) //case insensitive search { type = atype.Value; break; } } relatedPrintsListURL = Scraper.pspHostAppURL + relatedPrintsListLink.Attributes["href"].Value; var printsListHTMLDiv = Scraper.GetMainContentDivOnURL(relatedPrintsListURL); relatedPrintsURLs = printsListHTMLDiv.SelectNodes(".//a[@href]").Where(link => link.Attributes["href"].Value.Contains("tiskt.sqw")).Select(link => link.Attributes["href"].Value).ToList(); var headingText = HttpUtility.HtmlDecode(h1.InnerText); var dividedTitle = ScraperStringHelper.SplitByString(headingText, relatedPrintsListLink.InnerText); title = dividedTitle.ElementAt(1); var scrapedNumbers = ScraperStringHelper.GetNumbersFromString(relatedPrintsListLink.InnerText); number = scrapedNumbers.First().Value; var links = mainContent.SelectNodes(".//a"); var pspVotingsURLs = links.Where(link => link.Attributes["href"].Value.Contains("hlasy.sqw")).Select(x=>x.Attributes["href"].Value).ToList(); relatedpspVotings = new List<pspVoting>(); foreach (var votingLink in pspVotingsURLs) { var voting = new pspVoting(Scraper.pspHostAppURL + votingLink); relatedpspVotings.Add(voting); } var meetingScheduleLinks = links.Where(link => link.Attributes["href"].Value.Contains("ischuze.sqw")).ToList(); //this should always return one element or null if (meetingScheduleLinks.Count != 0) { //implement TryGetDate from meeting schedule var agendaLink = meetingScheduleLinks.First().Attributes["href"].Value; inAgenda = new pspMeetingAgenda(Scraper.pspHostAppURL + agendaLink); } scrapedDate = DateTime.Now; Console.WriteLine("Finished scraping pspPrintHistory"); // date } catch (Exception) { throw; } }