private static async Task <string> PostIt(Uri uri, FormSearchSos formSearchSos) { var formContent = new FormUrlEncodedContent(formSearchSos.FormDataList()); // PrintKeyValuePairs(formDataList); var request = new HttpRequestMessage { RequestUri = uri, Method = HttpMethod.Post, Content = formContent }; SetRequestHeaders(request); _httpRespMsg = await NetHttpClient.Client.SendAsync(request); if (!_httpRespMsg.IsSuccessStatusCode) { CurrentStatus.LastOpMessage = $"PostIt could not retrieve URL, StatusCode: {_httpRespMsg.StatusCode}"; CurrentStatus.ScrapeComplete = true; return(string.Empty); } var stringContent = await _httpRespMsg.Content.ReadAsStringAsync(); BytesReceived += stringContent.Length; return(stringContent); }
public static ScrapeStatus GetElections(FormSearchSos formSearchSos) { // TODO: put exception catcher here for network problems. var contentString = PostIt(new Uri(OfficeSearchResultsUrl), formSearchSos).Result; if (!_httpRespMsg.IsSuccessStatusCode) { CurrentStatus.LastOpMessage = $"GetElections call returned Status Code: {_httpRespMsg.StatusCode}"; CurrentStatus.ScrapeComplete = true; CurrentStatus.LastPageCompleted++; return(CurrentStatus); } if (string.IsNullOrEmpty(contentString)) { CurrentStatus.LastOpMessage = "GetElections received null content"; CurrentStatus.ScrapeComplete = true; CurrentStatus.LastPageCompleted++; return(CurrentStatus); } CurrentStatus.LastOpMessage = "GetElections received document length = " + contentString.Length; var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(contentString); const string tgtSelect = "//select[@id='id_election']/option"; var electNodes = htmlDoc.DocumentNode.SelectNodes(tgtSelect); if (electNodes == null) { CurrentStatus.ScrapeComplete = true; CurrentStatus.LastOpMessage = "ElectNodes search returned null."; return(CurrentStatus); } var elections = electNodes.Select(node => new Election(node.Attributes[0].Value, node.InnerText)).ToList(); CurrentStatus.ScrapeComplete = true; CurrentStatus.ScrapeSuccess = true; CurrentStatus.Elections = elections; return(CurrentStatus); }
public static ScrapeStatus GetCandidates(FormSearchSos formSearchSos) { var contentString = PostIt(new Uri(OfficeSearchResultsUrl), formSearchSos).Result; if (!_httpRespMsg.IsSuccessStatusCode) { CurrentStatus.LastOpMessage = $"GetCandidates call returned Status Code: {_httpRespMsg.StatusCode}"; CurrentStatus.ScrapeComplete = true; CurrentStatus.LastPageCompleted++; return(CurrentStatus); } if (string.IsNullOrEmpty(contentString)) { CurrentStatus.LastOpMessage = "GetCandidates received null content"; CurrentStatus.ScrapeComplete = true; CurrentStatus.LastPageCompleted++; return(CurrentStatus); } CurrentStatus.LastOpMessage = "GetCandidates received document length = " + contentString.Length; var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(contentString); var year = formSearchSos.ElectionYear; var candList = new List <CandidateSos>(); const string tgtDiv = "//*[@class=\"col1Inner\"]/table/tr"; var nodes = htmlDoc.DocumentNode.SelectNodes(tgtDiv); if (nodes == null) { CurrentStatus.ScrapeComplete = true; CurrentStatus.ScrapeSuccess = false; CurrentStatus.LastOpMessage = "Data table search returned null."; return(CurrentStatus); } var candDesc = string.Empty; foreach (var nodetr in nodes) { var tdObj = nodetr.ChildNodes[0]; // td 1 if (tdObj.Attributes.Count > 0 && tdObj.Attributes[0].Name == "colspan") // Account for #text children here { // Start of candidate section candDesc = CleanUpWhiteSpace(tdObj.InnerText); } else { // Get candidates if (nodetr.InnerHtml.Contains("Qualified - Signatures Required")) { // We're at the end of the list, asterisk break; } var subdoc = new HtmlDocument(); subdoc.LoadHtml(nodetr.InnerHtml); var rowcntr = 1; // November fails here var candTrRows = from table in subdoc.DocumentNode.SelectNodes("//table") from row in table.SelectNodes("tr") from cell in row.SelectNodes("td") select new CellData { RowNum = rowcntr++, CellText = CleanUpWhiteSpace(cell.InnerText) }; candList.Add(FillCandidate(candTrRows.ToList(), candDesc, year.VarValue)); } } CurrentStatus.ScrapeComplete = true; CurrentStatus.ScrapeSuccess = true; CurrentStatus.Candidates = candList; return(CurrentStatus); }