//[TestMethod] public void Get_Commit_History_For_Single_File() { GitRepository gr = new GitRepository(); List<IVersionHistory> list = gr.ViewHistory(@"28\HB129\", @"content.txt"); List<IVersionHistory> list2 = gr.ViewHistory(@"28\HB129\", @"bill.xml"); var stp = @"Stop"; }
public void Run_Spider_Inflate_And_Save_Results_To_GitHub() { DateTime Start = new DateTime(2014, 2, 19); DateTime End = new DateTime(2015, 2, 21); WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); options.DestinationFolderPath = new DirectoryInfo(dataDir); options.DestinationFileName = String.Format("Session-Activity[{0}][{1}].state", Start.Date.ToShortDateString().Replace("/", "-"), End.Date.ToShortDateString().Replace("/", "-")); options.MaximumLinkDepth = 3; options.TargetSession = 28; options.DownloadUri = new Uri(String.Format(@"http://www.legis.state.ak.us/basis/range_multi.asp?session={0}&Date1={1}&Date2={2}", options.TargetSession, Start.Date.ToShortDateString(), End.Date.ToShortDateString())); WebSiteDownloader rslt = Spider.DownloadingProcessor(options); /* 1. Select Bill Names * We need to know the bill name (HB16), so we can save data in a folder of the same name. */ //Static List of Bills var masterlist = (from r in rslt.Resources where r.Index == 1 && (r.AbsoluteUri.AbsoluteUri.Contains(@"get_bill.asp")) select r).ToList(); //Match bill titles in the URI (HB16,SB12..) Regex billTitles = new Regex(@"(?<=[=])[H|R|S][B|C|R|J]{0,3}[0-9]{1,4}", RegexOptions.IgnoreCase); //Return a list of the first matches var bills = (from b in masterlist let matches = billTitles.Matches(b.AbsoluteUri.AbsoluteUri) where matches.Count > 0 select new { resource = b, url = b.AbsoluteUri, name = matches.Cast<Match>().FirstOrDefault() }).ToList(); /* 2. Build out directory structure for bill data. * We have a list of bills, now where are we going to save the data? */ DirectoryInfo session = new DirectoryInfo(String.Format(@"{0}/{1}", dataDir, 28)); if (!session.Exists) session.Create(); foreach (var item in bills) { //bill directory DirectoryInfo bill = new DirectoryInfo(String.Format(@"{0}/{1}/{2}", dataDir, 28, item.name)); if (!bill.Exists) bill.Create(); } /* 3. Associated bill data * Grab associated bill data. Name, Title, LongTitle, * Minutes Content, Bill Revisions, Bill Activity */ foreach (var bill in bills) { //Results placeholders List<iCollector> meta = new List<iCollector>(); List<iCollector> revisions = new List<iCollector>(); List<iCollector> minutes = new List<iCollector>(); List<iCollector> committee = new List<iCollector>(); //Document history, activity and kvp.. meta.AddRange((from h in rslt.Parsings where h.source.AbsoluteUri.AbsoluteUri == bill.url.AbsoluteUri || h.source.Parent.AbsoluteUri == bill.url.AbsoluteUri select h).ToList()); //Bill Content revisions.AddRange((from d in rslt.Parsings where d.source.Parent.AbsoluteUri .Contains(String.Format(@"get_fulltext.asp?session={0}&bill={1}", 28, bill.name)) select d).ToList()); //Committee Meetings committee.AddRange((from d in rslt.Resources join p in rslt.Parsings on d.AbsoluteUri.AbsoluteUri equals p.source.Parent.AbsoluteUri where p.source.AbsoluteUri.AbsoluteUri.Contains("get_minutes.asp") && d.AbsoluteUri.AbsoluteUri.Contains(String.Format("{0}", bill.name)) select p).ToList()); //Meeting Transcript (minutes) minutes.AddRange((from d in rslt.Resources join p in rslt.Parsings on d.AbsoluteUri.AbsoluteUri equals p.source.Parent.AbsoluteUri where p.source.AbsoluteUri.AbsoluteUri .Contains(@"get_single_minute.asp") && d.AbsoluteUri.AbsoluteUri .Contains(String.Format("{0}", bill.name)) select p).ToList()); /* 4. Start saving off the data * We have a list of bills, now where are we going to save the data? */ String fileLoc = String.Format(@"{0}\{1}\", 28, bill.name); GitRepository gr = new GitRepository(); //Process bill parts gr.ProcessBill(fileLoc, new ParsedBill() { meta = meta, minutes = minutes, revisions = revisions, committee = committee }); } Assert.IsTrue(true); }