//private List<String> SessionActivity(DateTime Start, DateTime End) //{ // WebSiteDownloaderOptions options = // new WebSiteDownloaderOptions(); // options.DestinationFolderPath = // new DirectoryInfo(dataDir); // options.DestinationFileName = String.Format("Session-Activity[{0}][{1}].state", // Start.Date.ToShortDateString().Replace("/", "-"), // End.Date.ToShortDateString().Replace("/", "-")); // options.MaximumLinkDepth = 0; // options.TargetSession = 28; // options.DownloadUri = // new Uri(String.Format(@"http://www.legis.state.ak.us/basis/range_multi.asp?session={0}&Date1={1}&Date2={2}", // options.TargetSession, // Start.Date.ToShortDateString(), // End.Date.ToShortDateString())); // //Get all bill links. // options.GitCollectionRequest.Add(new DocumentHrefList() // { // pageName = "range_multi.asp", // pageType = UriType.Content, // pattern = new Regex(@"(?<=[=])[H|R|S][B|C|R|J]{0,3}[0-9]{1,4}", RegexOptions.IgnoreCase) // }); // //Download que engine. // WebSiteDownloader downloader = new WebSiteDownloader(options); // downloader.ProcessingUrl += // new WebSiteDownloader.ProcessingUrlEventHandler( // downloader_ProcessingUrl); // downloader.ProcessCompleted += // new WebSiteDownloader.ProcessCompletedEventHandler( // downloader_ProcessCompleted); // downloader.ProcessAsync(); // while (true) // { // Thread.Sleep(1000); // Console.WriteLine(@"."); // lock (typeof(Spider)) // { // if (finished) // { // break; // } // } // } // Console.WriteLine(@"finished processing."); // foreach (iCollector col in downloader.Parsings) // Console.WriteLine(String.Format("Rule found for {0}", col.pageName)); // //Reset the exit. // finished = false; // //Grab saved targets. // return ((DocumentHrefList)downloader.Parsings[0]).matches; //} public static WebSiteDownloader DownloadingProcessor(WebSiteDownloaderOptions options) { //Download que engine. WebSiteDownloader downloader = new WebSiteDownloader(options); downloader.ProcessingUrl += new WebSiteDownloader.ProcessingUrlEventHandler( downloader_ProcessingUrl); downloader.ProcessCompleted += new WebSiteDownloader.ProcessCompletedEventHandler( downloader_ProcessCompleted); downloader.ProcessAsync(); while (true) { Thread.Sleep(1000); Console.WriteLine(@"."); lock (typeof(Spider)) { if (finished) { break; } } } Console.WriteLine(@"finished processing."); foreach (iCollector col in downloader.Parsings) Console.WriteLine(String.Format("Rule found for {0}", col.pageName)); finished = false; return downloader; }
private static void downloader_ProcessingUrl( object sender, WebSiteDownloader.ProcessingUrlEventArgs e) { Console.WriteLine( string.Format( @"Processing URL '{0}'.", e.UriInfo.AbsoluteUri)); }
/// <summary> /// Re-inflates the search results. /// </summary> //[TestMethod] public void Run_Spider_On_Activity_Search_Results() { DateTime Start = new DateTime(2013, 2, 15); DateTime End = new DateTime(2013, 2, 18); WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); options.DestinationFolderPath = new DirectoryInfo(dataDir); options.DestinationFileName = String.Format("Session-Activity[{0}][{1}].state", Start.Date.ToShortDateString().Replace("/", "-"), End.Date.ToShortDateString().Replace("/", "-")); //Download que engine. WebSiteDownloader downloader = new WebSiteDownloader(options); List<iCollector> coll = (from p in downloader.Parsings select p).ToList(); //RegEx for matching bill copy. Regex r = new Regex("get[_]bill[_]text[.]asp"); //Get all matches. List<iCollector> refined = (from el in coll let matches = r.Matches(el.source.AbsoluteUri.AbsoluteUri) where matches.Count != 0 select el).ToList(); Assert.IsTrue(coll.Count() > 0); }