Ejemplo n.º 1
0
        //[TestMethod]
        public void Get_Commit_History_For_Single_File()
        {
            GitRepository gr = new GitRepository();
            List<IVersionHistory> list = gr.ViewHistory(@"28\HB129\", @"content.txt");

            List<IVersionHistory> list2 = gr.ViewHistory(@"28\HB129\", @"bill.xml");

            var stp = @"Stop";
        }
Ejemplo n.º 2
0
        public void Run_Spider_Inflate_And_Save_Results_To_GitHub()
        {
            DateTime Start = new DateTime(2014, 2, 19);
            DateTime End = new DateTime(2015, 2, 21);

            WebSiteDownloaderOptions options =
             new WebSiteDownloaderOptions();
            options.DestinationFolderPath =
                new DirectoryInfo(dataDir);
            options.DestinationFileName = String.Format("Session-Activity[{0}][{1}].state",
                                            Start.Date.ToShortDateString().Replace("/", "-"),
                                            End.Date.ToShortDateString().Replace("/", "-"));

            options.MaximumLinkDepth = 3;
            options.TargetSession = 28;
            options.DownloadUri =
                new Uri(String.Format(@"http://www.legis.state.ak.us/basis/range_multi.asp?session={0}&Date1={1}&Date2={2}",
                            options.TargetSession,
                            Start.Date.ToShortDateString(),
                            End.Date.ToShortDateString()));

            WebSiteDownloader rslt = Spider.DownloadingProcessor(options);

            /*
            1. Select Bill Names
            * We need to know the bill name (HB16), so we can save data in a folder of the same name.
            */

            //Static List of Bills
            var masterlist = (from r in rslt.Resources
                              where r.Index == 1 && (r.AbsoluteUri.AbsoluteUri.Contains(@"get_bill.asp"))
                              select r).ToList();

            //Match bill titles in the URI (HB16,SB12..)
            Regex billTitles = new Regex(@"(?<=[=])[H|R|S][B|C|R|J]{0,3}[0-9]{1,4}", RegexOptions.IgnoreCase);

            //Return a list of the first matches
            var bills = (from b in masterlist
                         let matches = billTitles.Matches(b.AbsoluteUri.AbsoluteUri)
                         where matches.Count > 0
                         select new
                         {
                             resource = b,
                             url = b.AbsoluteUri,
                             name = matches.Cast<Match>().FirstOrDefault()
                         }).ToList();

            /*
            2. Build out directory structure for bill data.
            * We have a list of bills, now where are we going to save the data?
            */

            DirectoryInfo session = new DirectoryInfo(String.Format(@"{0}/{1}", dataDir, 28));
            if (!session.Exists)
                session.Create();

            foreach (var item in bills)
            {
                //bill directory
                DirectoryInfo bill = new DirectoryInfo(String.Format(@"{0}/{1}/{2}", dataDir, 28, item.name));
                if (!bill.Exists)
                    bill.Create();
            }

            /*
            3. Associated bill data
            *  Grab associated bill data. Name, Title, LongTitle,
             *  Minutes Content, Bill Revisions, Bill Activity
            */

            foreach (var bill in bills)
            {

                //Results placeholders
                List<iCollector> meta = new List<iCollector>();
                List<iCollector> revisions = new List<iCollector>();
                List<iCollector> minutes = new List<iCollector>();
                List<iCollector> committee = new List<iCollector>();

                //Document history, activity and kvp..
                meta.AddRange((from h in rslt.Parsings
                               where h.source.AbsoluteUri.AbsoluteUri == bill.url.AbsoluteUri
                                  || h.source.Parent.AbsoluteUri == bill.url.AbsoluteUri
                               select h).ToList());

                //Bill Content
                revisions.AddRange((from d in rslt.Parsings
                                    where d.source.Parent.AbsoluteUri
                                           .Contains(String.Format(@"get_fulltext.asp?session={0}&bill={1}", 28, bill.name))
                                    select d).ToList());

                //Committee Meetings
                committee.AddRange((from d in rslt.Resources
                                    join p in rslt.Parsings
                                    on d.AbsoluteUri.AbsoluteUri equals
                                              p.source.Parent.AbsoluteUri
                                    where p.source.AbsoluteUri.AbsoluteUri.Contains("get_minutes.asp")
                                    && d.AbsoluteUri.AbsoluteUri.Contains(String.Format("{0}", bill.name))
                                    select p).ToList());

                //Meeting Transcript (minutes)
                minutes.AddRange((from d in rslt.Resources
                                  join p in rslt.Parsings
                                  on d.AbsoluteUri.AbsoluteUri equals
                                     p.source.Parent.AbsoluteUri
                                  where p.source.AbsoluteUri.AbsoluteUri
                                         .Contains(@"get_single_minute.asp")
                                         && d.AbsoluteUri.AbsoluteUri
                                         .Contains(String.Format("{0}", bill.name))
                                  select p).ToList());

                /*
                4. Start saving off the data
                * We have a list of bills, now where are we going to save the data?
                */

                String fileLoc = String.Format(@"{0}\{1}\", 28, bill.name);

                GitRepository gr = new GitRepository();
                //Process bill parts
                gr.ProcessBill(fileLoc, new ParsedBill()
                {
                    meta = meta,
                    minutes = minutes,
                    revisions = revisions,
                    committee = committee
                });

            }

            Assert.IsTrue(true);
        }