예제 #1
0
        private static async Task <string> PostIt(Uri uri, FormSearchSos formSearchSos)
        {
            var formContent = new FormUrlEncodedContent(formSearchSos.FormDataList());

            // PrintKeyValuePairs(formDataList);

            var request = new HttpRequestMessage {
                RequestUri = uri, Method = HttpMethod.Post, Content = formContent
            };

            SetRequestHeaders(request);

            _httpRespMsg = await NetHttpClient.Client.SendAsync(request);

            if (!_httpRespMsg.IsSuccessStatusCode)
            {
                CurrentStatus.LastOpMessage  = $"PostIt could not retrieve URL, StatusCode: {_httpRespMsg.StatusCode}";
                CurrentStatus.ScrapeComplete = true;
                return(string.Empty);
            }

            var stringContent = await _httpRespMsg.Content.ReadAsStringAsync();

            BytesReceived += stringContent.Length;

            return(stringContent);
        }
예제 #2
0
        public static ScrapeStatus GetElections(FormSearchSos formSearchSos)
        {
            // TODO: put exception catcher here for network problems.
            var contentString = PostIt(new Uri(OfficeSearchResultsUrl), formSearchSos).Result;

            if (!_httpRespMsg.IsSuccessStatusCode)
            {
                CurrentStatus.LastOpMessage  = $"GetElections call returned Status Code: {_httpRespMsg.StatusCode}";
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.LastPageCompleted++;
                return(CurrentStatus);
            }

            if (string.IsNullOrEmpty(contentString))
            {
                CurrentStatus.LastOpMessage  = "GetElections received null content";
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.LastPageCompleted++;
                return(CurrentStatus);
            }

            CurrentStatus.LastOpMessage = "GetElections received document length = " + contentString.Length;

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(contentString);

            const string tgtSelect = "//select[@id='id_election']/option";

            var electNodes = htmlDoc.DocumentNode.SelectNodes(tgtSelect);

            if (electNodes == null)
            {
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.LastOpMessage  = "ElectNodes search returned null.";
                return(CurrentStatus);
            }

            var elections = electNodes.Select(node => new Election(node.Attributes[0].Value, node.InnerText)).ToList();

            CurrentStatus.ScrapeComplete = true;
            CurrentStatus.ScrapeSuccess  = true;
            CurrentStatus.Elections      = elections;

            return(CurrentStatus);
        }
예제 #3
0
        public static ScrapeStatus GetCandidates(FormSearchSos formSearchSos)
        {
            var contentString = PostIt(new Uri(OfficeSearchResultsUrl), formSearchSos).Result;

            if (!_httpRespMsg.IsSuccessStatusCode)
            {
                CurrentStatus.LastOpMessage  = $"GetCandidates call returned Status Code: {_httpRespMsg.StatusCode}";
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.LastPageCompleted++;
                return(CurrentStatus);
            }

            if (string.IsNullOrEmpty(contentString))
            {
                CurrentStatus.LastOpMessage  = "GetCandidates received null content";
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.LastPageCompleted++;
                return(CurrentStatus);
            }

            CurrentStatus.LastOpMessage = "GetCandidates received document length = " + contentString.Length;

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(contentString);
            var year = formSearchSos.ElectionYear;

            var candList = new List <CandidateSos>();

            const string tgtDiv = "//*[@class=\"col1Inner\"]/table/tr";
            var          nodes  = htmlDoc.DocumentNode.SelectNodes(tgtDiv);

            if (nodes == null)
            {
                CurrentStatus.ScrapeComplete = true;
                CurrentStatus.ScrapeSuccess  = false;
                CurrentStatus.LastOpMessage  = "Data table search returned null.";
                return(CurrentStatus);
            }

            var candDesc = string.Empty;

            foreach (var nodetr in nodes)
            {
                var tdObj = nodetr.ChildNodes[0];                                        // td  1

                if (tdObj.Attributes.Count > 0 && tdObj.Attributes[0].Name == "colspan") // Account for #text children here
                {
                    // Start of candidate section
                    candDesc = CleanUpWhiteSpace(tdObj.InnerText);
                }
                else
                {
                    // Get candidates
                    if (nodetr.InnerHtml.Contains("Qualified - Signatures Required"))
                    {
                        // We're at the end of the list, asterisk
                        break;
                    }
                    var subdoc = new HtmlDocument();
                    subdoc.LoadHtml(nodetr.InnerHtml);
                    var rowcntr = 1;

                    // November fails here

                    var candTrRows = from table in subdoc.DocumentNode.SelectNodes("//table")
                                     from row in table.SelectNodes("tr")
                                     from cell in row.SelectNodes("td")
                                     select new CellData {
                        RowNum = rowcntr++, CellText = CleanUpWhiteSpace(cell.InnerText)
                    };

                    candList.Add(FillCandidate(candTrRows.ToList(), candDesc, year.VarValue));
                }
            }

            CurrentStatus.ScrapeComplete = true;
            CurrentStatus.ScrapeSuccess  = true;
            CurrentStatus.Candidates     = candList;
            return(CurrentStatus);
        }