예제 #1
0
        public void ScrapeTermJunction()
        {
            try
            {
                var mainContent = Scraper.GetMainContentDivOnURL(this.URL.ToString());
                var links       = mainContent.SelectNodes(".//a[@href]");
                foreach (var link in links)
                {
                    mainLinks.Add(link.InnerText, link.GetAttributeValue("href", ""));
                }
                if (mainLinks["Stenoprotokoly"] != null)
                {
                    meetingsListStenoprotocolLinks = Scraper.pspHostURL + mainLinks["Stenoprotokoly"];

                    try
                    {
                        var stenoMainContent = Scraper.GetMainContentDivOnURL(meetingsListStenoprotocolLinks); // this should fetch for example http://www.psp.cz/eknih/2010ps/stenprot/index.htm
                        var stenoLinks       = stenoMainContent.SelectNodes(".//a[@href]/b");
                        foreach (var boldNode in stenoLinks)
                        {
                            var href = boldNode.ParentNode.GetAttributeValue("href", "");
                            MeetingProtocols.Add(new pspMeetingProtocol(Scraper.pspHostURL + href));
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                }
            }
            catch (Exception)
            {
                //error while craping the term junction
                throw;
            }
        }
예제 #2
0
        public pspVoting(string URL)
        {
            var webLoader = Scraper.WebGetFactory();
            var document  = webLoader.Load(URL);

            try
            {
                URL = webLoader.ResponseUri.ToString();
                var mainContent = document.DocumentNode.SelectSingleNode("//div[@id = 'main-content']");
                var h1          = mainContent.SelectNodes(".//h1");
                var lis         = mainContent.SelectNodes(".//li");

                var headingText    = HttpUtility.HtmlDecode(h1.First().InnerText);
                var scrapedNumbers = ScraperStringHelper.GetNumbersFromString(headingText);

                Console.WriteLine(headingText);

                if (scrapedNumbers.Count == 6)
                {
                    var numbersAsInts = new List <int>();  //DateTime(int year, int month, int day, int hour, int minute, int second);
                    scrapedNumbers.ToList().ForEach(x => numbersAsInts.Add((int)x.Value));

                    meetingNumber = scrapedNumbers.ElementAt(0).Value;
                    votingNumber  = scrapedNumbers.ElementAt(1).Value;
                    subject       = headingText.Substring(headingText.LastIndexOf(":") + 3).Trim();

                    when = new DateTime(numbersAsInts.ElementAt(3), czechCalendarHelper.getMonthFromString(headingText), numbersAsInts.ElementAt(2), numbersAsInts.ElementAt(4), numbersAsInts.ElementAt(5), 0);

                    pspVotes = new List <individualVote>();
                    foreach (var LINode in lis)
                    {
                        if (isLINodeaVote(LINode))
                        {
                            var parliamentMemberLinkNode = LINode.LastChild;
                            var name = HttpUtility.HtmlDecode(parliamentMemberLinkNode.InnerText);
                            var link = Scraper.pspHostAppURL + parliamentMemberLinkNode.Attributes["href"].Value;

                            var vote = new individualVote()
                            {
                                member = new parliamentMember {
                                    name = name, pspUrl = link
                                }
                            };
                            switch (LINode.FirstChild.Attributes["class"].Value)
                            {
                            case "flag yes": vote.how = individualVotingTypes.Agrees;
                                break;

                            case "flag no": vote.how = individualVotingTypes.Disagrees;
                                break;

                            case "flag not-logged-in": vote.how = individualVotingTypes.NotPresent;
                                break;

                            case "flag refrained": vote.how = individualVotingTypes.Refrained;
                                break;

                            case "flag excused": vote.how = individualVotingTypes.NotPresentExcused;
                                break;
                            }
                            AddIndividualVote(vote);
                        }
                    }

                    //foreach (var vote in pspVotes)
                    //{
                    //    using (var session = pspScraper.Scraper.docDB.OpenSession())
                    //    {
                    //        var pspMember = session.Query<parliamentMember>().FirstOrDefault(x => x.pspUrl == vote.member.pspUrl);

                    //        session.Store(voting);

                    //        session.SaveChanges();
                    //    }
                    //}

                    Console.WriteLine("Added {0} votes", pspVotes.Count);
                }
                else
                {
                    throw new Exception {
                    };
                }
            }
            catch (Exception)
            {
                throw;
            }
        }