static void Main(string[] args) { var reader = XmlReader.Create("http://blog.folketsting.dk/feed/"); var feed = SyndicationFeed.Load<SyndicationFeed>(reader); var db = new DBDataContext(); foreach (var item in feed.Items) { var post = db.BlogPosts.SingleOrDefault(_ => _.WordpressId == item.Id); if (post != null) { post.Title = item.Title.Text; post.Summary = item.Summary.Text; post.PermaLink = item.Links[0].Uri.AbsoluteUri; db.SubmitChanges(); } else { post = new BlogPost() { WordpressId = item.Id, Title = item.Title.Text, Summary = item.Summary.Text, Date = item.PublishDate.DateTime, PermaLink = item.Links[0].Uri.AbsoluteUri, }; db.BlogPosts.InsertOnSubmit(post); } db.SubmitChanges(); } Console.WriteLine("press the any key ..."); Console.ReadKey(); }
public static int? DownloadDocument(string url, FT.DB.P20Question question) { //make sure url is normalized url = url.Trim().ToLower(); // check to see if we already have document downloaded from this Uri var db = new DBDataContext(); var doc = db.Documents.SingleOrDefault(d => d.Uri == url); if (doc != null) { return doc.DocumentId; } try { HttpWebResponse resp = GetResponse(url, 0); byte[] arrBuffer = new byte[0]; using (BinaryReader reader = new BinaryReader(resp.GetResponseStream())) { byte[] arrScratch = null; while ((arrScratch = reader.ReadBytes(4096)).Length > 0) { if (arrBuffer.Length == 0) arrBuffer = arrScratch; else { byte[] arrTemp = new byte[arrBuffer.Length + arrScratch.Length]; Array.Copy(arrBuffer, arrTemp, arrBuffer.Length); Array.Copy(arrScratch, 0, arrTemp, arrBuffer.Length, arrScratch.Length); arrBuffer = arrTemp; } } } Binary bin = new Binary(arrBuffer); FT.DB.Document newdoc = new DB.Document(); newdoc.Data = bin; newdoc.ContentType = resp.ContentType; newdoc.Uri = url; var scribdids = UpLoadToScribd(url, question); newdoc.ScribdId = scribdids.Item1; newdoc.ScribdAccessKey = scribdids.Item2; db.Documents.InsertOnSubmit(newdoc); db.SubmitChanges(); return newdoc.DocumentId; } catch (Exception e) { return null; } }
public void DeleteAllDocs() { var db = new DBDataContext(); foreach (var d in db.Documents) { if (d.ScribdId.Value != 35896972) { Scribd.Net.Document.Delete(d.ScribdId.Value); } db.Documents.DeleteOnSubmit(d); } db.SubmitChanges(); }
public void AddAccessKeyToAllScribdDocs() { int pagesize = 100; Scribd.Net.Service.APIKey = "6qoqzj285ftfmvddexcpb"; Scribd.Net.Service.SecretKey = "sec-6hrkkevcf77mmn34uz73csjmo7"; Scribd.Net.Service.EnforceSigning = true; Scribd.Net.Service.PublisherID = "pub-82439046238225493803"; //Scribd.Net.Search.Criteria crit = new Scribd.Net.Search.Criteria(); //crit.Scope = Scribd.Net.SearchScope.Account; //crit.MaxResults = pagesize; //crit.StartIndex = 1; //crit.Query = "Svar"; var db = new DBDataContext(); //foreach (var doc in docs) //{ // var dbdoc = db.Documents.Single(d => d.ScribdId == doc.DocumentId); // dbdoc.ScribdAccessKey = doc.AccessKey; //} //Scribd.Net.Search.Result res = null; List<Scribd.Net.Document> res = null; int offset = 0; do { //res = Scribd.Net.Search.Find(crit); res = Scribd.Net.Document.GetList(Scribd.Net.Service.User, pagesize, offset, false); foreach (var doc in res) { var dbdoc = db.Documents.Single(d => d.ScribdId == doc.DocumentId); if (dbdoc.ScribdAccessKey == null) { dbdoc.ScribdAccessKey = doc.AccessKey; } } //Console.WriteLine("Updated" + res.Documents.Count); //Thread.Sleep(5000); //crit.StartIndex += pagesize; offset += res.Count; } while (res.Count > 0); db.SubmitChanges(); }
public static void GeoCode() { var db = new DBDataContext(); foreach (var trip in db.CommitteeTrips.Where(_ => !_.CommitteeTripDestinations.Any())) { if (!string.IsNullOrEmpty(trip.Place)) { Console.WriteLine("geocoding " + trip.Place); string[] dests = { }; if (trip.Place.Contains(" og ")) { dests = trip.Place.Split( new string[] { ",", "og" }, StringSplitOptions.RemoveEmptyEntries) .Select(_ => _.Trim()).ToArray(); } else { dests = new string[] { trip.Place }; } var poss = dests.Select(_ => new { pos = Geo.Geocoder.GeoCode(_, false), place = _ }); var destinations = poss.Where(_ => _.pos != null).Select(_ => new CommitteeTripDestination { CommitteeTripId = trip.CommitteeTripId, Lat = _.pos.Lat, Lng = _.pos.Lng, PlaceNameName = _.place.Trim() }); db.CommitteeTripDestinations.InsertAllOnSubmit(destinations); } } db.SubmitChanges(); Console.WriteLine("all done"); }
public ActionResult Register(string userName, string email, string password, string confirmPassword, string returnurl) { ViewData["PasswordLength"] = MembershipService.MinPasswordLength; if (ValidateRegistration(userName, email, password, confirmPassword)) { MembershipCreateStatus createStatus = MembershipCreateStatus.ProviderError; // Attempt to register the user //using (TransactionScope ts = new TransactionScope()) //{ createStatus = MembershipService.CreateUser(userName, password, email); if (createStatus == MembershipCreateStatus.Success) { FormsAuth.SignIn(userName, false /* createPersistentCookie */); // also create our user var db = new DBDataContext(); MembershipUser newuser = Membership.GetUser(); db.Users.InsertOnSubmit(new User() { Username = userName, CreatedOn = DateTime.Now, //aspnetuserid = (Guid)newuser.ProviderUserKey, }); db.SubmitChanges(); //ts.Complete(); if(!string.IsNullOrEmpty(returnurl)) return Redirect(Server.UrlDecode(returnurl)); else return RedirectToAction("Index", "Home"); } else { ModelState.AddModelError("_FORM", ErrorCodeToString(createStatus)); } // ts.Complete(); //} } // If we got this far, something failed, redisplay form return View(); }
public ActionResult New( [Bind(Prefix = "ApiUser", Include = "EmailAddress,IntendedUse")] ApiUser user, bool captchaValid ) { if (!captchaValid) { ViewData.ModelState.AddModelError("captcha", "CAPTCHA forkert"); } if (string.IsNullOrEmpty(user.EmailAddress)) { ViewData.ModelState.AddModelError("ApiUser.EmailAddress", "Ingen email adresse angivet"); } var db = new DBDataContext(); if (db.ApiUsers.Any(_ => _.EmailAddress == user.EmailAddress)) { ViewData.ModelState.AddModelError("ApiUser.EmailAddress", "Email adresse findes allerede, skriv til [email protected] hvis du glemt nøgle"); } if (!ModelState.IsValid) { // try again return View("New", new NewApiUserViewModel { Breadcrumb = new List<Breadcrumb> { Breadcrumb.Home, }, MetaDescription = "Lav API-nøgle til Folkets Ting API", ApiUser = user, }); } else { // we have a live one string apikey = GetKey(apiKeyLength); // make sure the apikey is distinct while(db.ApiUsers.Any(_ => _.ApiKey == apikey)) { apikey = GetKey(apiKeyLength); } user.ApiKey = apikey; user.CreatedDate = DateTime.Now; db.ApiUsers.InsertOnSubmit(user); db.SubmitChanges(); return RedirectToAction("Created", "ApiRegistration", new { key = apikey }); } }
private static void HandleRow(HtmlNode row) { var onclick = row.Attributes["onclick"].Value; var url = onclick.Split('\'')[1]; string ftid = url.Split('{')[1].Replace("}", ""); var db = new DBDataContext(); var trip = db.CommitteeTrips.SingleOrDefault(_ => _.FTId == ftid); //if (trip != null && trip.ActualExpenses.HasValue && trip.ActualExpenses != 0) //{ // // this trip is prolly completely accounted for // return; //} var cells = row.SelectHtmlNodes("td"); var startstring = cells.ElementAt(0).InnerText.Trim(); var startdate = DateTime.ParseExact(startstring, "dd-MM-yyyy", null); var endstring = cells.ElementAt(1).InnerText.Trim(); var enddate = DateTime.ParseExact(endstring, "dd-MM-yyyy", null); if (startdate.Year < 1900 || enddate.Year < 1900) { return; } var commname = cells.ElementAt(2).InnerText.Trim(); var committee = Scrape2009.GetCommitteeId(commname, db); var purpose = cells.ElementAt(5).InnerText.Trim(); var place = cells.ElementAt(3).InnerText.Trim(); if (place.ToLower().Contains("aflyst") || purpose.ToLower().Contains("aflyst")) { // give up return; } var doc = Scrape2009.GetDoc("http://www.ft.dk" + url); var menudiv = doc.DocumentNode.SelectSingleNode("//div[@id='menuSkip']"); if (menudiv.InnerText.ToLower().Contains("afbud")) { return; } var participantnode = menudiv.SelectHtmlNodes("p/h3"). SingleOrDefault(_ => _.InnerText.Trim() == "Deltagere"); if (participantnode == null) { // no politicians went, discard return; } var participants = participantnode. NextSibling.SelectHtmlNodes("li/a"). Select(_ => _.Attributes["href"].Value); var polids = participants.Select(_ => Scrape2009.GetPoliticianByUrl(_, db)); var otherparticipantnode = menudiv.SelectHtmlNodes("p/h3"). SingleOrDefault(_ => _.InnerText.Trim() == "Øvrige deltagere"); var othercount = 0; if (otherparticipantnode != null) { othercount = OtherMemberCount(otherparticipantnode); } var budgetstring = menudiv.SelectHtmlNodes("p/h3"). Single(_ => _.InnerText.Trim() == "Budget"). NextSibling.InnerText.Trim().Split(' ')[0] .Replace(".", "").Replace(",", ""); var spendstring = menudiv.SelectHtmlNodes("p/h3"). Single(_ => _.InnerText.Trim() == "Regnskab"). NextSibling.InnerText.Trim().Split(' ')[0] .Replace(".", "").Replace(",", ""); var provider = new CultureInfo("da-dk"); //var provider = new CultureInfo("en-us"); var budget = decimal.Parse(budgetstring, provider); var spend = decimal.Parse(spendstring, provider); lock (dblock) { if (trip == null) { trip = new CommitteeTrip(); db.CommitteeTrips.InsertOnSubmit(trip); } trip.ActualExpenses = spend; trip.Budget = budget; trip.CommitteeId = committee; trip.EndDate = enddate; trip.Place = place; trip.Purpose = purpose; trip.StartDate = startdate; trip.NonPolParticipants = othercount; trip.FTId = ftid; trip.Uri = url; db.SubmitChanges(); var newpols = polids.Where(_ => _.HasValue && !trip.CommitteeTripParticipants.Any(p => p.Politician.PoliticianId == _.Value) ); var tripparticipants = newpols.Select(_ => new CommitteeTripParticipant { CommitteeTripId = trip.CommitteeTripId, ParticipantId = _.Value }); var partstodelete = trip.CommitteeTripParticipants. Where(_ => !polids.Contains(_.ParticipantId)); db.CommitteeTripParticipants.DeleteAllOnSubmit(partstodelete); db.CommitteeTripParticipants.InsertAllOnSubmit(tripparticipants); db.SubmitChanges(); } }
public static void GetQ(int ftid, string title, IEnumerable<string> commiteestrings, bool answered, string url, Session samling, bool record = true) { var db = new DBDataContext(); // check to see if we have this one and if it's answered var question = db.P20Questions.SingleOrDefault( _ => _.SessionId == samling.SessionId && _.FTId == ftid); if (question != null && question.AnswerDate.HasValue) { // we're done here return; } if (ftid == 2370) { // question is borked, ignore return; } // ok, follow the link HtmlDocument doc = Scrape2009.GetDoc(Scrape2009.fastdomain + url); if (question == null) { // create a new one // get asker, askee short title and background var shortitle = doc.DocumentNode.SelectSingleNode("//div[@id='menuSkip']/h1"). InnerText.Split(new string[] { ftid.ToString() }, StringSplitOptions.None)[1].Trim().Trim('.'); var pasker = doc.SelectHtmlNodes("//div[@id='menuSkip']/p").SingleOrDefault( _ => _.InnerText.Trim().ToLower().StartsWith("af ")); if (pasker == null) { // we have to this due to this one with no asker: http://www.ft.dk/samling/20091/spoergsmaal/S445/index.htm return; } //var polurls = pasker.SelectNodes("a").OfType<HtmlNode>() // .Where(x => x.Attributes["href"] != null) // .Select(n => n.Attributes["href"].Value).Distinct(); var politicianAnchors = pasker.SelectNodes("a").OfType<HtmlNode>(); var askerPoliticianNameAndParty = politicianAnchors .Where(x => x.InnerText.Contains("(")) .First().InnerText; var askerName = askerPoliticianNameAndParty.Split('(')[0].Trim(); var askerParty = askerPoliticianNameAndParty.Split('(')[1].Replace(")", "").Trim(); //var paskerurl = polurls.First(); //pasker.SelectNodes("a").OfType<HtmlNode>().First().Attributes["href"].Value; var asker = Scrape2009.GetPoliticianByNameAndParty(askerName, askerParty, db) .PoliticianId; // get the relevant minister var minregex = new Regex(@"Til[ \t]*(?'tit'[\w\s-]*)<br>"); var match = minregex.Matches(pasker.InnerHtml); if (match.Count < 1) { // might be an incomplete question, just return return; } string ministertitle = match[0].Groups["tit"].Value.Trim(); //var paskeeurl = polurls.Skip(1).First(); //pasker.SelectNodes("a").OfType<HtmlNode>().Skip(1).First().Attributes["href"].Value; //var askee = Scrape2009.GetPoliticianByUrl(paskeeurl, db); var askeeePoliticianNameAndParty = politicianAnchors .Skip(1).Last().InnerText; var askeeName = askeeePoliticianNameAndParty.Split('(')[0].Trim(); int? askee = null; if (!askeeePoliticianNameAndParty.Contains("(")) { // sometimes the party is not listed with name askee = Scrape2009.GetPoliticianByName(askeeName, db).PoliticianId; } else { var askeeParty = askeeePoliticianNameAndParty.Split('(')[1].Replace(")", "").Trim(); askee = Scrape2009.GetPoliticianByNameAndParty(askeeName, askeeParty, db).PoliticianId; } var pbackground = doc.SelectHtmlNodes("//div[@id='menuSkip']/p").SingleOrDefault( _ => _.InnerText.Trim().ToLower().StartsWith("skriftlig begrundelse")); string backgroundtext = null; if (pbackground != null) { pbackground.InnerHtml. Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries). Skip(1).Aggregate((a, b) => a + " " + b.Trim()); } var datereg = new Regex(@"<br>(?'day'\d\d)-(?'mon'\d\d)-(?'yea'\d\d\d\d)<br>"); var datematch = datereg.Matches(pasker.InnerHtml); var askdate = new DateTime( int.Parse(datematch[0].Groups["yea"].Value.Trim()), int.Parse(datematch[0].Groups["mon"].Value.Trim()), int.Parse(datematch[0].Groups["day"].Value.Trim()) ); question = new P20Question { AskeeId = askee, AskerPolId = asker, Title = shortitle, Background = backgroundtext, Type = QuestionType.Politician, FTId = ftid, Question = title, AskeeTitle = ministertitle, SessionId = samling.SessionId, AskDate = askdate, }; db.P20Questions.InsertOnSubmit(question); if (record) { db.SubmitChanges(); } var committees = db.Committees.Where(_ => commiteestrings.Contains(_.Name)); db.ItemCommittees.InsertAllOnSubmit(committees.ToList().Select(_ => new ItemCommittee { CommitteeId = _.CommitteeId, ItemId = question.P20QuestionId, ItemType = 1 } )); if (record) { db.SubmitChanges(); } } if (question != null && (!question.AnswerDate.HasValue || !answered)) { // ok, try to get the answer, it should be there since the question looks answered // first, the date // have to do last due to this one // http://www.ft.dk/samling/20091/spoergsmaal/S2566/index.htm var dateps = doc.SelectHtmlNodes("//p[@style='padding-left:10px;']"); if (dateps.Any()) { var datep = dateps.Last(); //doc.SelectHtmlNodes("//div[@class='lovlist' or class='line clearfix']/*/p").Single(); var receivedregex = new Regex(@"Modtaget: (?'day'\d\d)-(?'mon'\d\d)-(?'yea'\d\d\d\d)<br>"); var rdatematch = receivedregex.Matches(datep.InnerHtml); var answer = new DateTime( int.Parse(rdatematch[0].Groups["yea"].Value.Trim()), int.Parse(rdatematch[0].Groups["mon"].Value.Trim()), int.Parse(rdatematch[0].Groups["day"].Value.Trim()) ); var tablewithanswerlink = doc.SelectHtmlNodes("//table[@class='lovTable']").Last(); Func<string, bool> answerrowfinder = _ => _.StartsWith("Svar:") || _.StartsWith("Svar :") || _.StartsWith("Svar (endeligt):") || _.StartsWith("Endeligt svar") || _.StartsWith("Svar på") || _.StartsWith("Supplerende svar på") || _.StartsWith("UDKASTspg") || _.ToLower().Contains("besvarelse") || _.ToLower().Contains("svar på") || _.StartsWith("S "); var rowwithcrapanswerlink = tablewithanswerlink.SelectHtmlNodes("tbody/tr"). SingleOrDefault(_ => answerrowfinder(_.InnerText)); if (rowwithcrapanswerlink == null) { // apparently not quite ready yet } else { var craplinkurl = rowwithcrapanswerlink.SelectHtmlNodes("td/ul/li/a"). Single().Attributes["href"].Value; var crapdoc = Scrape2009.GetDoc(craplinkurl); // we do last due to this one // http://www.ft.dk/samling/20091/spoergsmaal/s2695/svar/737831/index.htm#dok var answerrow = crapdoc.SelectHtmlNodes("//table[@class='lovTable']/tbody/tr"). LastOrDefault(_ => answerrowfinder(_.InnerText.Replace(" ", " "))); if (answerrow == null) { // due to weirdness here: http://www.ft.dk/samling/20091/spoergsmaal/s536/svar/669456/index.htm#dok answerrow = crapdoc.SelectHtmlNodes("//table[@class='lovTable']/tbody/tr").First(); } var answerlink = answerrow.SelectHtmlNodes("td/a"). Single(_ => _.InnerText.Trim().StartsWith("Html-version")).Attributes["href"].Value; var docanswerlink = answerrow.SelectHtmlNodes("td/div/div/ul/li/a"). First().Attributes["href"].Value; if (docanswerlink.Contains("founded")) { throw new ArgumentException("no such pdf for " + ftid); } var answerdocid = Util.DownloadDocument(docanswerlink, question); if (answerdocid != null) { question.AnswerDocumentId = answerdocid; question.AnswerDate = answer; } else { // apparently something went wrong when downloading doc, disregard } } } else { // hmm, looks like it's not actually answered for reals } } if (record) { //Console.WriteLine("submitting {0}", question.Title); db.SubmitChanges(); } }