private bool RunQuery(FormSearch search) { SeqStatus.TheFormSearch = search; if (!UpdateCandidates.ReadFirstPage(search)) { // Don't continue, say why switch (UpdateCandidates.CurrentStatus.TotalPages) { case -2: // Problem with internet connection SeqStatus.LastOpMessage = $"RunQuery: Fail in first page search for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}: {UpdateCandidates.CurrentStatus.LastOpMessage}"; SeqStatus.SequenceFail = true; return(false); case -1: // Problem with search: Could not retrieve URL, null content SeqStatus.LastOpMessage = $"RunQuery: Fail in first page search for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}: {UpdateCandidates.CurrentStatus.LastOpMessage}"; break; case 0: // No candidates found in category SeqStatus.LastOpMessage = $"RunQuery: No candidates found for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}."; break; case 1: // Only one page of results SeqStatus.LastOpMessage = $"RunQuery: Found {UpdateCandidates.Candidates.Count} for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}."; break; default: SeqStatus.LastOpMessage = $"RunQuery: ReadFirstPage said don't continue for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}, PageCount: {UpdateCandidates.CurrentStatus.TotalPages}: Should never get here!"; break; } } while (UpdateCandidates.CurrentStatus.LastPageCompleted < UpdateCandidates.CurrentStatus.TotalPages) { // SeqStatus.LastOpMessage = $"RunQuery: Reading subsequent page {pageCounter++} for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}."; var finished = UpdateCandidates.ReadSubsequentPage(search); } var candidates = UpdateCandidates.Candidates; //SeqStatus.LastOpMessage = // $"RunQuery: Finished query for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}, Candidate Count: {UpdateCandidates.CurrentStatus.TotalCandidates}"; return(true); }
public static bool ReadThePage(Candidate candidate) { ResetStatus(true, true); AddInfoStatus.Url = candidate.InfoUrl; AddInfoStatus.TheUri = new Uri(candidate.InfoUrl); var httpRespMsg = UpdateCandidates.GetSearchPage(AddInfoStatus.TheUri, HttpMethod.Get).Result; if (!httpRespMsg.IsSuccessStatusCode) { AddInfoStatus.LastOpMessage = $"ReadThePage could not retrieve URL, StatusCode: {httpRespMsg.StatusCode}"; AddInfoStatus.ScrapeComplete = true; return(false); } var contentString = httpRespMsg.Content.ReadAsStringAsync().Result; UpdateCandidates.BytesReceived += contentString.Length; if (string.IsNullOrEmpty(contentString)) { AddInfoStatus.LastOpMessage = "ReadThePage received null content"; AddInfoStatus.ScrapeComplete = true; return(false); } // AddInfoStatus.LastOpMessage = "ReadThePage received document length = " + contentString.Length; var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(contentString); // Check for "CAMPAIGN REPORTS - NAME SEARCH" - indicates no Results const string tgtNoResults = "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/h3"; var noResultsNodes = htmlDoc.DocumentNode.SelectNodes(tgtNoResults); if (noResultsNodes != null) { if (noResultsNodes[0].InnerHtml.Contains("CAMPAIGN REPORTS - NAME SEARCH")) { AddInfoStatus.LastOpMessage = "Additional Info Search Returned No Data."; AddInfoStatus.TotalPages = 0; AddInfoStatus.TotalCandidates = 0; AddInfoStatus.ScrapeComplete = true; return(false); } } // Get candidate data from the table const string tgtTable = "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/table/tr/td/div[2]/table[2]/tr/td/div/table/tr[2]"; var nodes = htmlDoc.DocumentNode.SelectNodes(tgtTable); if (nodes == null) { AddInfoStatus.ScrapeComplete = true; AddInfoStatus.LastOpMessage = $"Additional Data table search returned null, FilerId: {candidate.FilerId}, Candidate Name: {candidate.CandidateName}"; return(false); } var filerId = nodes[0].ChildNodes[1].InnerText.Trim(); if (filerId != candidate.FilerId) { // Just make a note of it and continue AddInfoStatus.LastOpMessage = $"FilerId mismatch, first row: {filerId}, input: {candidate.FilerId}."; candidate.Notes += $"Different FilerId on info page: {filerId}."; } var txt = nodes[0].ChildNodes[2].InnerText.TrimStart().TrimEnd(); ParseOfficeName(txt, candidate); candidate.Status = nodes[0].ChildNodes[4].InnerText.Trim(); // Now get affiliation from Registration Information tab const string tgtCampaignRegInfo = "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/table/tr/td/div[2]/div/div/div[2]/div[2]/div/div/table/tr/td/div/table"; var campaignRegInfo = htmlDoc.DocumentNode.SelectNodes(tgtCampaignRegInfo); if (campaignRegInfo == null) { AddInfoStatus.ScrapeComplete = true; AddInfoStatus.LastOpMessage = "Campaign Registration Info Tab search returned null."; return(false); } candidate.Affiliation = campaignRegInfo[5].ChildNodes[0].InnerText; AddInfoStatus.ScrapeComplete = true; AddInfoStatus.LastPageCompleted = 1; return(true); }