Exemple #1
0
        static void Main(string[] args)
        {
            var reader = XmlReader.Create("http://blog.folketsting.dk/feed/");
            var feed = SyndicationFeed.Load<SyndicationFeed>(reader);

            var db = new DBDataContext();
            foreach (var item in feed.Items)
            {
                var post = db.BlogPosts.SingleOrDefault(_ => _.WordpressId == item.Id);
                if (post != null)
                {
                    post.Title = item.Title.Text;
                    post.Summary = item.Summary.Text;
                    post.PermaLink = item.Links[0].Uri.AbsoluteUri;
                    db.SubmitChanges();
                }
                else
                {
                    post = new BlogPost()
                    {
                        WordpressId = item.Id,
                        Title = item.Title.Text,
                        Summary = item.Summary.Text,
                        Date = item.PublishDate.DateTime,
                        PermaLink = item.Links[0].Uri.AbsoluteUri,
                    };
                    db.BlogPosts.InsertOnSubmit(post);
                }
                db.SubmitChanges();
            }
            Console.WriteLine("press the any key ...");
            Console.ReadKey();
        }
Exemple #2
0
        public static int? DownloadDocument(string url, FT.DB.P20Question question)
        {
            //make sure url is normalized
            url = url.Trim().ToLower();

            // check to see if we already have document downloaded from this Uri
            var db = new DBDataContext();
            var doc = db.Documents.SingleOrDefault(d => d.Uri == url);
            if (doc != null)
            {
                return doc.DocumentId;
            }

            try
            {
                HttpWebResponse resp = GetResponse(url, 0);

                byte[] arrBuffer = new byte[0];
                using (BinaryReader reader = new BinaryReader(resp.GetResponseStream()))
                {
                    byte[] arrScratch = null;
                    while ((arrScratch = reader.ReadBytes(4096)).Length > 0)
                    {
                        if (arrBuffer.Length == 0)
                            arrBuffer = arrScratch;
                        else
                        {
                            byte[] arrTemp = new byte[arrBuffer.Length + arrScratch.Length];
                            Array.Copy(arrBuffer, arrTemp, arrBuffer.Length);
                            Array.Copy(arrScratch, 0, arrTemp, arrBuffer.Length, arrScratch.Length);
                            arrBuffer = arrTemp;
                        }
                    }
                }

                Binary bin = new Binary(arrBuffer);

                FT.DB.Document newdoc = new DB.Document();

                newdoc.Data = bin;
                newdoc.ContentType = resp.ContentType;
                newdoc.Uri = url;

                var scribdids = UpLoadToScribd(url, question);
                newdoc.ScribdId = scribdids.Item1;
                newdoc.ScribdAccessKey = scribdids.Item2;

                db.Documents.InsertOnSubmit(newdoc);
                db.SubmitChanges();

                return newdoc.DocumentId;
            }
            catch (Exception e)
            {
                return null;
            }
        }
        public void DeleteAllDocs()
        {
            var db = new DBDataContext();

            foreach (var d in db.Documents)
            {
                if (d.ScribdId.Value != 35896972)
                {
                    Scribd.Net.Document.Delete(d.ScribdId.Value);
                }
                db.Documents.DeleteOnSubmit(d);
            }
            db.SubmitChanges();
        }
        public void AddAccessKeyToAllScribdDocs()
        {
            int pagesize = 100;

            Scribd.Net.Service.APIKey = "6qoqzj285ftfmvddexcpb";
            Scribd.Net.Service.SecretKey = "sec-6hrkkevcf77mmn34uz73csjmo7";
            Scribd.Net.Service.EnforceSigning = true;
            Scribd.Net.Service.PublisherID = "pub-82439046238225493803";

            //Scribd.Net.Search.Criteria crit = new Scribd.Net.Search.Criteria();
            //crit.Scope = Scribd.Net.SearchScope.Account;
            //crit.MaxResults = pagesize;
            //crit.StartIndex = 1;
            //crit.Query = "Svar";

            var db = new DBDataContext();

            //foreach (var doc in docs)
            //{
            //    var dbdoc = db.Documents.Single(d => d.ScribdId == doc.DocumentId);
            //    dbdoc.ScribdAccessKey = doc.AccessKey;
            //}

            //Scribd.Net.Search.Result res = null;
            List<Scribd.Net.Document> res = null;
            int offset = 0;
            do
            {
                //res = Scribd.Net.Search.Find(crit);
                res = Scribd.Net.Document.GetList(Scribd.Net.Service.User, pagesize, offset, false);

                foreach (var doc in res)
                {
                    var dbdoc = db.Documents.Single(d => d.ScribdId == doc.DocumentId);
                    if (dbdoc.ScribdAccessKey == null)
                    {
                        dbdoc.ScribdAccessKey = doc.AccessKey;
                    }
                }

                //Console.WriteLine("Updated" + res.Documents.Count);
                //Thread.Sleep(5000);

                //crit.StartIndex += pagesize;
                offset += res.Count;
            }
            while (res.Count > 0);

            db.SubmitChanges();
        }
Exemple #5
0
        public static void GeoCode()
        {
            var db = new DBDataContext();

            foreach (var trip in db.CommitteeTrips.Where(_ => !_.CommitteeTripDestinations.Any()))
            {
                if (!string.IsNullOrEmpty(trip.Place))
                {
                    Console.WriteLine("geocoding " + trip.Place);
                    string[] dests = { };
                    if (trip.Place.Contains(" og "))
                    {
                        dests = trip.Place.Split(
                            new string[] { ",", "og" }, StringSplitOptions.RemoveEmptyEntries)
                            .Select(_ => _.Trim()).ToArray();
                    }
                    else
                    {
                        dests = new string[] { trip.Place };
                    }

                    var poss = dests.Select(_ => new { pos = Geo.Geocoder.GeoCode(_, false), place = _ });
                    var destinations = poss.Where(_ => _.pos != null).Select(_ => new CommitteeTripDestination
                    {
                        CommitteeTripId = trip.CommitteeTripId,
                        Lat = _.pos.Lat,
                        Lng = _.pos.Lng,
                        PlaceNameName = _.place.Trim()
                    });
                    db.CommitteeTripDestinations.InsertAllOnSubmit(destinations);
                }
            }
            db.SubmitChanges();

            Console.WriteLine("all done");
        }
        public ActionResult Register(string userName, string email, string password, 
            string confirmPassword, string returnurl)
        {
            ViewData["PasswordLength"] = MembershipService.MinPasswordLength;

            if (ValidateRegistration(userName, email, password, confirmPassword))
            {
                MembershipCreateStatus createStatus = MembershipCreateStatus.ProviderError;
                // Attempt to register the user
                //using (TransactionScope ts = new TransactionScope())
                //{
                createStatus =
                    MembershipService.CreateUser(userName, password, email);

                if (createStatus == MembershipCreateStatus.Success)
                {
                    FormsAuth.SignIn(userName, false /* createPersistentCookie */);
                    // also create our user
                    var db = new DBDataContext();
                    MembershipUser newuser = Membership.GetUser();
                    db.Users.InsertOnSubmit(new User()
                    {
                        Username = userName,
                        CreatedOn = DateTime.Now,
                        //aspnetuserid = (Guid)newuser.ProviderUserKey,
                    });

                    db.SubmitChanges();

                    //ts.Complete();
                    if(!string.IsNullOrEmpty(returnurl))
                        return Redirect(Server.UrlDecode(returnurl));
                    else
                        return RedirectToAction("Index", "Home");
                }

                else
                {
                    ModelState.AddModelError("_FORM", ErrorCodeToString(createStatus));
                }
                //    ts.Complete();
                //}
            }

            // If we got this far, something failed, redisplay form
            return View();
        }
        public ActionResult New(
            [Bind(Prefix = "ApiUser", Include = "EmailAddress,IntendedUse")]
            ApiUser user,
            bool captchaValid
            )
        {
            if (!captchaValid)
            {
                ViewData.ModelState.AddModelError("captcha", "CAPTCHA forkert");
            }

            if (string.IsNullOrEmpty(user.EmailAddress))
            {
                ViewData.ModelState.AddModelError("ApiUser.EmailAddress", "Ingen email adresse angivet");
            }

            var db = new DBDataContext();
            if (db.ApiUsers.Any(_ => _.EmailAddress == user.EmailAddress))
            {
                ViewData.ModelState.AddModelError("ApiUser.EmailAddress", "Email adresse findes allerede, skriv til [email protected] hvis du glemt nøgle");
            }

            if (!ModelState.IsValid)
            {
                // try again
                return View("New", new NewApiUserViewModel
                    {
                        Breadcrumb = new List<Breadcrumb>
                        {
                            Breadcrumb.Home,
                        },
                        MetaDescription = "Lav API-nøgle til Folkets Ting API",
                        ApiUser = user,
                    });
            }
            else
            {
                // we have a live one
                string apikey = GetKey(apiKeyLength);
                // make sure the apikey is distinct

                while(db.ApiUsers.Any(_ => _.ApiKey == apikey))
                {
                    apikey = GetKey(apiKeyLength);
                }

                user.ApiKey = apikey;
                user.CreatedDate = DateTime.Now;

                db.ApiUsers.InsertOnSubmit(user);
                db.SubmitChanges();

                return RedirectToAction("Created", "ApiRegistration", new { key = apikey });
            }
        }
Exemple #8
0
        private static void HandleRow(HtmlNode row)
        {
            var onclick = row.Attributes["onclick"].Value;
            var url = onclick.Split('\'')[1];
            string ftid = url.Split('{')[1].Replace("}", "");

            var db = new DBDataContext();
            var trip = db.CommitteeTrips.SingleOrDefault(_ => _.FTId == ftid);

            //if (trip != null && trip.ActualExpenses.HasValue && trip.ActualExpenses != 0)
            //{
            //    // this trip is prolly completely accounted for
            //    return;
            //}

            var cells = row.SelectHtmlNodes("td");

            var startstring = cells.ElementAt(0).InnerText.Trim();
            var startdate = DateTime.ParseExact(startstring, "dd-MM-yyyy", null);

            var endstring = cells.ElementAt(1).InnerText.Trim();
            var enddate = DateTime.ParseExact(endstring, "dd-MM-yyyy", null);

            if (startdate.Year < 1900 || enddate.Year < 1900)
            {
                return;
            }

            var commname = cells.ElementAt(2).InnerText.Trim();

            var committee = Scrape2009.GetCommitteeId(commname, db);

            var purpose = cells.ElementAt(5).InnerText.Trim();
            var place = cells.ElementAt(3).InnerText.Trim();
            if (place.ToLower().Contains("aflyst") || purpose.ToLower().Contains("aflyst"))
            {
                // give up
                return;
            }

            var doc = Scrape2009.GetDoc("http://www.ft.dk" + url);

            var menudiv = doc.DocumentNode.SelectSingleNode("//div[@id='menuSkip']");
            if (menudiv.InnerText.ToLower().Contains("afbud"))
            {
                return;
            }

            var participantnode = menudiv.SelectHtmlNodes("p/h3").
                SingleOrDefault(_ => _.InnerText.Trim() == "Deltagere");

            if (participantnode == null)
            {
                // no politicians went, discard
                return;
            }

            var participants = participantnode.
                NextSibling.SelectHtmlNodes("li/a").
                Select(_ => _.Attributes["href"].Value);

            var polids = participants.Select(_ => Scrape2009.GetPoliticianByUrl(_, db));

            var otherparticipantnode = menudiv.SelectHtmlNodes("p/h3").
                SingleOrDefault(_ => _.InnerText.Trim() == "Øvrige deltagere");

            var othercount = 0;
            if (otherparticipantnode != null)
            {
                othercount = OtherMemberCount(otherparticipantnode);
            }

            var budgetstring = menudiv.SelectHtmlNodes("p/h3").
                Single(_ => _.InnerText.Trim() == "Budget").
                NextSibling.InnerText.Trim().Split(' ')[0]
                .Replace(".", "").Replace(",", "");

            var spendstring = menudiv.SelectHtmlNodes("p/h3").
                Single(_ => _.InnerText.Trim() == "Regnskab").
                NextSibling.InnerText.Trim().Split(' ')[0]
                .Replace(".", "").Replace(",", "");

            var provider = new CultureInfo("da-dk");
            //var provider = new CultureInfo("en-us");
            var budget = decimal.Parse(budgetstring, provider);
            var spend = decimal.Parse(spendstring, provider);

            lock (dblock)
            {
                if (trip == null)
                {
                    trip = new CommitteeTrip();
                    db.CommitteeTrips.InsertOnSubmit(trip);
                }

                trip.ActualExpenses = spend;
                trip.Budget = budget;
                trip.CommitteeId = committee;
                trip.EndDate = enddate;
                trip.Place = place;
                trip.Purpose = purpose;
                trip.StartDate = startdate;
                trip.NonPolParticipants = othercount;
                trip.FTId = ftid;
                trip.Uri = url;

                db.SubmitChanges();

                var newpols = polids.Where(_ =>
                    _.HasValue &&
                    !trip.CommitteeTripParticipants.Any(p => p.Politician.PoliticianId == _.Value)
                    );

                var tripparticipants = newpols.Select(_ =>
                    new CommitteeTripParticipant
                    {
                        CommitteeTripId = trip.CommitteeTripId,
                        ParticipantId = _.Value
                    });

                var partstodelete = trip.CommitteeTripParticipants.
                    Where(_ => !polids.Contains(_.ParticipantId));
                db.CommitteeTripParticipants.DeleteAllOnSubmit(partstodelete);

                db.CommitteeTripParticipants.InsertAllOnSubmit(tripparticipants);
                db.SubmitChanges();
            }
        }
Exemple #9
0
        public static void GetQ(int ftid, string title, IEnumerable<string> commiteestrings,
            bool answered, string url, Session samling, bool record = true)
        {
            var db = new DBDataContext();
            // check to see if we have this one and if it's answered
            var question = db.P20Questions.SingleOrDefault(
                _ => _.SessionId == samling.SessionId && _.FTId == ftid);

            if (question != null && question.AnswerDate.HasValue)
            {
                // we're done here
                return;
            }

            if (ftid == 2370)
            {
                // question is borked, ignore
                return;
            }

            // ok, follow the link
            HtmlDocument doc = Scrape2009.GetDoc(Scrape2009.fastdomain + url);

            if (question == null)
            {
                // create a new one
                // get asker, askee short title and background
                var shortitle = doc.DocumentNode.SelectSingleNode("//div[@id='menuSkip']/h1").
                    InnerText.Split(new string[] { ftid.ToString() }, StringSplitOptions.None)[1].Trim().Trim('.');

                var pasker = doc.SelectHtmlNodes("//div[@id='menuSkip']/p").SingleOrDefault(
                    _ => _.InnerText.Trim().ToLower().StartsWith("af "));
                if (pasker == null)
                {
                    // we have to this due to this one with no asker: http://www.ft.dk/samling/20091/spoergsmaal/S445/index.htm
                    return;
                }
                //var polurls = pasker.SelectNodes("a").OfType<HtmlNode>()
                //    .Where(x => x.Attributes["href"] != null)
                //    .Select(n => n.Attributes["href"].Value).Distinct();

                var politicianAnchors = pasker.SelectNodes("a").OfType<HtmlNode>();

                var askerPoliticianNameAndParty = politicianAnchors
                    .Where(x => x.InnerText.Contains("("))
                    .First().InnerText;
                var askerName = askerPoliticianNameAndParty.Split('(')[0].Trim();
                var askerParty = askerPoliticianNameAndParty.Split('(')[1].Replace(")", "").Trim();

                //var paskerurl = polurls.First();
                    //pasker.SelectNodes("a").OfType<HtmlNode>().First().Attributes["href"].Value;
                var asker =
                    Scrape2009.GetPoliticianByNameAndParty(askerName, askerParty, db)
                    .PoliticianId;

                // get the relevant minister
                var minregex = new Regex(@"Til[ \t]*(?'tit'[\w\s-]*)<br>");
                var match = minregex.Matches(pasker.InnerHtml);
                if (match.Count < 1)
                {
                    // might be an incomplete question, just return
                    return;
                }
                string ministertitle = match[0].Groups["tit"].Value.Trim();
                //var paskeeurl = polurls.Skip(1).First();
                    //pasker.SelectNodes("a").OfType<HtmlNode>().Skip(1).First().Attributes["href"].Value;
                //var askee = Scrape2009.GetPoliticianByUrl(paskeeurl, db);

                var askeeePoliticianNameAndParty = politicianAnchors
                    .Skip(1).Last().InnerText;
                var askeeName = askeeePoliticianNameAndParty.Split('(')[0].Trim();

                int? askee = null;
                if (!askeeePoliticianNameAndParty.Contains("("))
                {
                    // sometimes the party is not listed with name
                    askee = Scrape2009.GetPoliticianByName(askeeName, db).PoliticianId;
                }
                else
                {
                    var askeeParty = askeeePoliticianNameAndParty.Split('(')[1].Replace(")", "").Trim();
                    askee = Scrape2009.GetPoliticianByNameAndParty(askeeName,
                        askeeParty, db).PoliticianId;
                }

                var pbackground = doc.SelectHtmlNodes("//div[@id='menuSkip']/p").SingleOrDefault(
                    _ => _.InnerText.Trim().ToLower().StartsWith("skriftlig begrundelse"));

                string backgroundtext = null;
                if (pbackground != null)
                {
                    pbackground.InnerHtml.
                         Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries).
                         Skip(1).Aggregate((a, b) => a + " " + b.Trim());
                }

                var datereg = new Regex(@"<br>(?'day'\d\d)-(?'mon'\d\d)-(?'yea'\d\d\d\d)<br>");
                var datematch = datereg.Matches(pasker.InnerHtml);
                var askdate = new DateTime(
                    int.Parse(datematch[0].Groups["yea"].Value.Trim()),
                    int.Parse(datematch[0].Groups["mon"].Value.Trim()),
                    int.Parse(datematch[0].Groups["day"].Value.Trim())
                    );

                question = new P20Question
                {
                    AskeeId = askee,
                    AskerPolId = asker,
                    Title = shortitle,
                    Background = backgroundtext,
                    Type = QuestionType.Politician,
                    FTId = ftid,
                    Question = title,
                    AskeeTitle = ministertitle,
                    SessionId = samling.SessionId,
                    AskDate = askdate,
                };
                db.P20Questions.InsertOnSubmit(question);
                if (record)
                {
                    db.SubmitChanges();
                }

                var committees = db.Committees.Where(_ => commiteestrings.Contains(_.Name));

                db.ItemCommittees.InsertAllOnSubmit(committees.ToList().Select(_ =>
                    new ItemCommittee
                    {
                        CommitteeId = _.CommitteeId,
                        ItemId = question.P20QuestionId,
                        ItemType = 1
                    }
                    ));
                if (record)
                {
                    db.SubmitChanges();
                }
            }

            if (question != null && (!question.AnswerDate.HasValue || !answered))
            {
                // ok, try to get the answer, it should be there since the question looks answered
                // first, the date
                // have to do last due to this one
                // http://www.ft.dk/samling/20091/spoergsmaal/S2566/index.htm

                var dateps = doc.SelectHtmlNodes("//p[@style='padding-left:10px;']");
                if (dateps.Any())
                {
                    var datep = dateps.Last();
                    //doc.SelectHtmlNodes("//div[@class='lovlist' or class='line clearfix']/*/p").Single();

                    var receivedregex =
                        new Regex(@"Modtaget: (?'day'\d\d)-(?'mon'\d\d)-(?'yea'\d\d\d\d)<br>");
                    var rdatematch = receivedregex.Matches(datep.InnerHtml);
                    var answer = new DateTime(
                        int.Parse(rdatematch[0].Groups["yea"].Value.Trim()),
                        int.Parse(rdatematch[0].Groups["mon"].Value.Trim()),
                        int.Parse(rdatematch[0].Groups["day"].Value.Trim())
                        );

                    var tablewithanswerlink = doc.SelectHtmlNodes("//table[@class='lovTable']").Last();

                    Func<string, bool> answerrowfinder = _ =>
                            _.StartsWith("Svar:") ||
                            _.StartsWith("Svar :") ||
                            _.StartsWith("Svar (endeligt):") ||
                            _.StartsWith("Endeligt svar") ||
                            _.StartsWith("Svar på") ||
                            _.StartsWith("Supplerende svar på") ||
                            _.StartsWith("UDKASTspg") ||
                            _.ToLower().Contains("besvarelse") ||
                            _.ToLower().Contains("svar på") ||
                            _.StartsWith("S ");

                    var rowwithcrapanswerlink = tablewithanswerlink.SelectHtmlNodes("tbody/tr").
                        SingleOrDefault(_ => answerrowfinder(_.InnerText));
                    if (rowwithcrapanswerlink == null)
                    {
                        // apparently not quite ready yet
                    }
                    else
                    {
                        var craplinkurl = rowwithcrapanswerlink.SelectHtmlNodes("td/ul/li/a").
                            Single().Attributes["href"].Value;

                        var crapdoc = Scrape2009.GetDoc(craplinkurl);
                        // we do last due to this one
                        // http://www.ft.dk/samling/20091/spoergsmaal/s2695/svar/737831/index.htm#dok
                        var answerrow = crapdoc.SelectHtmlNodes("//table[@class='lovTable']/tbody/tr").
                            LastOrDefault(_ => answerrowfinder(_.InnerText.Replace("  ", " ")));
                        if (answerrow == null)
                        {
                            // due to weirdness here: http://www.ft.dk/samling/20091/spoergsmaal/s536/svar/669456/index.htm#dok
                            answerrow = crapdoc.SelectHtmlNodes("//table[@class='lovTable']/tbody/tr").First();
                        }
                        var answerlink = answerrow.SelectHtmlNodes("td/a").
                            Single(_ =>
                                _.InnerText.Trim().StartsWith("Html-version")).Attributes["href"].Value;
                        var docanswerlink = answerrow.SelectHtmlNodes("td/div/div/ul/li/a").
                            First().Attributes["href"].Value;

                        if (docanswerlink.Contains("founded"))
                        {
                            throw new ArgumentException("no such pdf for " + ftid);
                        }

                        var answerdocid = Util.DownloadDocument(docanswerlink, question);
                        if (answerdocid != null)
                        {
                            question.AnswerDocumentId = answerdocid;
                            question.AnswerDate = answer;
                        }
                        else
                        {
                            // apparently something went wrong when downloading doc, disregard
                        }
                    }
                }
                else
                {
                    // hmm, looks like it's not actually answered for reals
                }

            }
            if (record)
            {
                //Console.WriteLine("submitting {0}", question.Title);
                db.SubmitChanges();
            }
        }