示例#1
0
        private bool RunQuery(FormSearch search)
        {
            SeqStatus.TheFormSearch = search;

            if (!UpdateCandidates.ReadFirstPage(search))
            {
                // Don't continue, say why

                switch (UpdateCandidates.CurrentStatus.TotalPages)
                {
                case -2:
                    // Problem with internet connection
                    SeqStatus.LastOpMessage =
                        $"RunQuery: Fail in first page search for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}: {UpdateCandidates.CurrentStatus.LastOpMessage}";
                    SeqStatus.SequenceFail = true;
                    return(false);

                case -1:
                    // Problem with search: Could not retrieve URL, null content
                    SeqStatus.LastOpMessage =
                        $"RunQuery: Fail in first page search for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}: {UpdateCandidates.CurrentStatus.LastOpMessage}";
                    break;

                case 0:
                    // No candidates found in category
                    SeqStatus.LastOpMessage =
                        $"RunQuery: No candidates found for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}.";
                    break;

                case 1:
                    // Only one page of results
                    SeqStatus.LastOpMessage =
                        $"RunQuery: Found {UpdateCandidates.Candidates.Count} for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}.";
                    break;

                default:
                    SeqStatus.LastOpMessage =
                        $"RunQuery: ReadFirstPage said don't continue for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}, PageCount: {UpdateCandidates.CurrentStatus.TotalPages}: Should never get here!";
                    break;
                }
            }

            while (UpdateCandidates.CurrentStatus.LastPageCompleted < UpdateCandidates.CurrentStatus.TotalPages)
            {
                // SeqStatus.LastOpMessage = $"RunQuery: Reading subsequent page {pageCounter++} for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}.";
                var finished = UpdateCandidates.ReadSubsequentPage(search);
            }

            var candidates = UpdateCandidates.Candidates;

            //SeqStatus.LastOpMessage =
            //    $"RunQuery: Finished query for {search.OfficeName}, officeTypeId: {search.OfficeTypeId}, Candidate Count: {UpdateCandidates.CurrentStatus.TotalCandidates}";

            return(true);
        }
示例#2
0
        public static bool ReadThePage(Candidate candidate)
        {
            ResetStatus(true, true);
            AddInfoStatus.Url    = candidate.InfoUrl;
            AddInfoStatus.TheUri = new Uri(candidate.InfoUrl);

            var httpRespMsg = UpdateCandidates.GetSearchPage(AddInfoStatus.TheUri, HttpMethod.Get).Result;

            if (!httpRespMsg.IsSuccessStatusCode)
            {
                AddInfoStatus.LastOpMessage  = $"ReadThePage could not retrieve URL, StatusCode: {httpRespMsg.StatusCode}";
                AddInfoStatus.ScrapeComplete = true;
                return(false);
            }
            var contentString = httpRespMsg.Content.ReadAsStringAsync().Result;

            UpdateCandidates.BytesReceived += contentString.Length;


            if (string.IsNullOrEmpty(contentString))
            {
                AddInfoStatus.LastOpMessage  = "ReadThePage received null content";
                AddInfoStatus.ScrapeComplete = true;
                return(false);
            }

            // AddInfoStatus.LastOpMessage = "ReadThePage received document length = " + contentString.Length;

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(contentString);

            // Check for "CAMPAIGN REPORTS - NAME SEARCH" - indicates no Results
            const string tgtNoResults = "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/h3";

            var noResultsNodes = htmlDoc.DocumentNode.SelectNodes(tgtNoResults);

            if (noResultsNodes != null)
            {
                if (noResultsNodes[0].InnerHtml.Contains("CAMPAIGN REPORTS - NAME SEARCH"))
                {
                    AddInfoStatus.LastOpMessage   = "Additional Info Search Returned No Data.";
                    AddInfoStatus.TotalPages      = 0;
                    AddInfoStatus.TotalCandidates = 0;
                    AddInfoStatus.ScrapeComplete  = true;
                    return(false);
                }
            }

            // Get candidate data from the table
            const string tgtTable =
                "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/table/tr/td/div[2]/table[2]/tr/td/div/table/tr[2]";

            var nodes = htmlDoc.DocumentNode.SelectNodes(tgtTable);

            if (nodes == null)
            {
                AddInfoStatus.ScrapeComplete = true;
                AddInfoStatus.LastOpMessage  = $"Additional Data table search returned null, FilerId: {candidate.FilerId}, Candidate Name: {candidate.CandidateName}";
                return(false);
            }

            var filerId = nodes[0].ChildNodes[1].InnerText.Trim();

            if (filerId != candidate.FilerId)
            {
                // Just make a note of it and continue
                AddInfoStatus.LastOpMessage = $"FilerId mismatch, first row: {filerId}, input: {candidate.FilerId}.";
                candidate.Notes            += $"Different FilerId on info page: {filerId}.";
            }

            var txt = nodes[0].ChildNodes[2].InnerText.TrimStart().TrimEnd();

            ParseOfficeName(txt, candidate);
            candidate.Status = nodes[0].ChildNodes[4].InnerText.Trim();

            // Now get affiliation from Registration Information tab
            const string tgtCampaignRegInfo =
                "/html/body/form/table/tr[2]/td/table/tr/td[2]/div/table/tr/td/div[2]/div/div/div[2]/div[2]/div/div/table/tr/td/div/table";

            var campaignRegInfo = htmlDoc.DocumentNode.SelectNodes(tgtCampaignRegInfo);

            if (campaignRegInfo == null)
            {
                AddInfoStatus.ScrapeComplete = true;
                AddInfoStatus.LastOpMessage  = "Campaign Registration Info Tab search returned null.";
                return(false);
            }

            candidate.Affiliation = campaignRegInfo[5].ChildNodes[0].InnerText;

            AddInfoStatus.ScrapeComplete    = true;
            AddInfoStatus.LastPageCompleted = 1;

            return(true);
        }