public static void FirstAnalyzed(String WebUri, String DirPath) { urlfinishedcount = 0; finished = false; WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); //options.DownloadUri = // new Uri(@"http://sudarshannews.com/"); //options.DestinationFolderPath = // new DirectoryInfo(@"C:\Users\SATWADHIR PAWAR\Desktop\scrap"); options.DownloadUri = new Uri(WebUri); options.DestinationFolderPath = new DirectoryInfo(DirPath); WebSiteDownloader downloader = new WebSiteDownloader(options); downloader.ProcessingUrl += new WebSiteDownloader.ProcessingUrlEventHandler( downloader_ProcessingUrl); downloader.ProcessCompleted += new WebSiteDownloader.ProcessCompletedEventHandler( downloader_ProcessCompleted); downloader.ProcessAsync(); while (true) { Thread.Sleep(100); Console.WriteLine(@"."); urlfinishedcount += 1; if (urlfinishedcount == 200) { urlfinishedcount = 0; finished = true; } lock (typeof(ScrapperDemoStart)) { if (finished) { break; } } } Console.WriteLine(@"finished."); }//End FirstAnalyze
protected override void ExecuteTask() { WebSiteDownloaderOptions options; WebSiteDownloader downloader; foreach (SpiderUrl spiderUrl in SpiderUrls) { options = spiderUrl.Options; options.DestinationFolderPath = new DirectoryInfo(this.DestinationFolderPath); downloader = new WebSiteDownloader(options); downloader.Process(); } }
public static bool RunTheFirstAnalyze(string url, string folderpath) { WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); options.DownloadUri = new Uri(url); options.DestinationFolderPath = new DirectoryInfo(folderpath); WebSiteDownloader downloader = new WebSiteDownloader(options); downloader.ProcessingUrl += new WebSiteDownloader.ProcessingUrlEventHandler( downloader_ProcessingUrl); downloader.ProcessCompleted += new WebSiteDownloader.ProcessCompletedEventHandler( downloader_ProcessCompleted); downloader.ProcessAsync(); while (true) { Thread.Sleep(1000); Console.WriteLine(@"."); lock (typeof(Program)) { if (finished) { break; } } } Console.WriteLine(@"finished."); return finished; }
private static void Main( string[] args ) { WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); options.DownloadUri = new Uri(@"http://www.cadfolks.com"); options.DestinationFolderPath = new DirectoryInfo(@"F:\demo"); WebSiteDownloader downloader = new WebSiteDownloader( options ); downloader.ProcessingUrl += new WebSiteDownloader.ProcessingUrlEventHandler( downloader_ProcessingUrl ); downloader.ProcessCompleted += new WebSiteDownloader.ProcessCompletedEventHandler( downloader_ProcessCompleted ); downloader.ProcessAsync(); while ( true ) { Thread.Sleep( 1000 ); Console.WriteLine( @"." ); lock ( typeof( Program ) ) { if ( finished ) { break; } } } Console.WriteLine( @"finished." ); }
}//End FirstAnalyze private static void downloader_ProcessingUrl( object sender, WebSiteDownloader.ProcessingUrlEventArgs e) { Console.WriteLine( string.Format( @"Processing URL '{0}'.", e.UriInfo.AbsoluteUri)); }
/// <summary> /// Re-inflates the search results. /// </summary> //[TestMethod] public void Run_Spider_On_Activity_Search_Results() { DateTime Start = new DateTime(2013, 2, 15); DateTime End = new DateTime(2013, 2, 18); WebSiteDownloaderOptions options = new WebSiteDownloaderOptions(); options.DestinationFolderPath = new DirectoryInfo(dataDir); options.DestinationFileName = String.Format("Session-Activity[{0}][{1}].state", Start.Date.ToShortDateString().Replace("/", "-"), End.Date.ToShortDateString().Replace("/", "-")); //Download que engine. WebSiteDownloader downloader = new WebSiteDownloader(options); List<iCollector> coll = (from p in downloader.Parsings select p).ToList(); //RegEx for matching bill copy. Regex r = new Regex("get[_]bill[_]text[.]asp"); //Get all matches. List<iCollector> refined = (from el in coll let matches = r.Matches(el.source.AbsoluteUri.AbsoluteUri) where matches.Count != 0 select el).ToList(); Assert.IsTrue(coll.Count() > 0); }