public static void Main(string[] args) { string log4netPath = AppDomain.CurrentDomain.BaseDirectory + "log4net.xml"; LogHelper.Initialize(log4netPath); string configPath = ConfigurationManager.AppSettings["ConfigurationFile"]; if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath)) { Console.WriteLine("Configuration file missing. \nPress any key to exit..."); Console.ReadKey(); return; } string config = File.ReadAllText(configPath); List<SiteParameter> siteParameters = JsonConvert.DeserializeObject<List<SiteParameter>>(config); foreach (var parameter in siteParameters) { LogHelper.WriteInfo($"Starting crawler for {parameter.MerchantNamePattern}"); var crawler = CrawlerFactory.Create(parameter); crawler.Crawl(); LogHelper.WriteInfo($"Crawling {parameter.MerchantNamePattern} done."); } #if DEBUG Console.WriteLine("Press any key to exit..."); Console.ReadKey(); #endif }
void crawl(string url) { //解析dom元素 Crawler crawler = CrawlerFactory.getCrawler(url); if (crawler != null) { crawler.crawl(browser.Document); } }
public static void Main(string[] args) { string connectionString = ConfigurationManager.ConnectionStrings["MySqlConStr"].ConnectionString; CrawlerDbHelper.Init(connectionString); using (var context = CrawlerDbHelper.GetContext()) { if (!context.Database.Exists()) { // TODO: Create database here } } string logLevel = ConfigurationManager.AppSettings["MultiLogger.LogLevel"]; if (!string.IsNullOrWhiteSpace(logLevel)) { LogType level = LogType.Information; Enum.TryParse <LogType>(logLevel, out level); Logging.LogLevel = level; } string fileLoggerPath = ConfigurationManager.AppSettings["FileLogger.Path"]; if (!string.IsNullOrWhiteSpace(fileLoggerPath)) { FileLogger fileLogger = new FileLogger(fileLoggerPath); Logging.Loggers.Add(fileLogger); } string configPath = ConfigurationManager.AppSettings["ConfigurationFile"]; if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath)) { Console.WriteLine("Configuration file missing. \nPress any key to exit..."); Console.ReadKey(); return; } string config = File.ReadAllText(configPath); List <SiteParameter> siteParameters = JsonConvert.DeserializeObject <List <SiteParameter> >(config); foreach (var parameter in siteParameters) { Logging.WriteEntry("Main", LogType.Information, $"Starting crawler for {parameter.SiteName}"); var crawler = CrawlerFactory.Create(parameter); crawler.Crawl(parameter); Logging.WriteEntry("Main", LogType.Information, $"Crawling {parameter.SiteName} done."); } #if DEBUG Console.WriteLine("Press any key to exit..."); Console.ReadKey(); #endif }
public void FAZER_ESSE_METODO() { var args = new string[] { "--termo", "sem você a vida é tão", "--tipo", "0" }; var factory = new CrawlerFactory(args); Assert.AreEqual(factory.GetCrawler().GetImagesAsync().GetAwaiter().GetResult().Count, 1); }
public void Dropbox_Uma_Partitura() { var args = new List <string>() { "--termo", "sem você a vida é tão", "--tipo", "1" }; var factory = new CrawlerFactory(args.ToArray()); Assert.AreEqual(factory.GetCrawler().GetImagesAsync().GetAwaiter().GetResult().Count, 1); }
private async Task CheckBlogsOnlineStatus() { if (shellService.Settings.CheckOnlineStatusAtStartup) { await Task.Run(async() => { IEnumerable <IBlog> blogs = managerService.BlogFiles; foreach (IBlog blog in blogs) { ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService); await crawler.IsBlogOnlineAsync(); } }); } }
private async Task AddBlogAsync(string blogUrl) { if (string.IsNullOrEmpty(blogUrl)) { blogUrl = crawlerService.NewBlogUrl; } // TODO: Dependency, not SOLID! IBlog blog; try { blog = BlogFactory.GetBlog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index")); } catch (ArgumentException) { return; } blog = settingsService.TransferGlobalSettingsToBlog(blog); ICrawler crawler = CrawlerFactory.GetCrawler(blog.BlogType, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, blog); await crawler.IsBlogOnlineAsync(); if (CheckIfTumblrPrivateBlog(blog)) { blog = PromoteTumblrBlogToPrivateBlog(blog); crawler = CrawlerFactory.GetCrawler(blog.BlogType, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, blog); } await crawler.UpdateMetaInformationAsync(); lock (lockObject) { if (managerService.BlogFiles.Any(blogs => blogs.Name.Equals(blog.Name) && blogs.BlogType.Equals(blog.BlogType))) { shellService.ShowError(null, Resources.BlogAlreadyExist, blog.Name); return; } if (blog.Save()) { QueueOnDispatcher.CheckBeginInvokeOnUI((Action)(() => managerService.BlogFiles.Add(blog))); } } }
private async Task CheckBlogsOnlineStatusAsync() { if (shellService.Settings.CheckOnlineStatusOnStartup) { await Task.Run(async() => { var semaphoreSlim = new SemaphoreSlim(25); IEnumerable <IBlog> blogs = managerService.BlogFiles; IEnumerable <Task> tasks = blogs.Select(async blog => { await semaphoreSlim.WaitAsync(); ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService); await crawler.IsBlogOnlineAsync(); semaphoreSlim.Release(); }); await Task.WhenAll(tasks); }); } }
private async Task AddBlogAsync(string blogUrl) { if (string.IsNullOrEmpty(blogUrl)) { blogUrl = crawlerService.NewBlogUrl; } IBlog blog; try { blog = BlogFactory.GetBlog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index")); } catch (ArgumentException) { return; } if ((blog.GetType() == typeof(TumblrBlog)) && await TumblrBlogDetector.IsHiddenTumblrBlog(blog.Url)) { blog = PromoteTumblrBlogToHiddenBlog(blog); } lock (lockObject) { if (managerService.BlogFiles.Any(blogs => blogs.Name.Equals(blog.Name) && blogs.BlogType.Equals(blog.BlogType))) { shellService.ShowError(null, Resources.BlogAlreadyExist, blog.Name); return; } if (blog.Save()) { AddToManager(blog); } } blog = settingsService.TransferGlobalSettingsToBlog(blog); ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService); await crawler.UpdateMetaInformationAsync(); }
private async Task LoadLibrary() { Logger.Verbose("ManagerController.LoadLibrary:Start"); managerService.BlogFiles.Clear(); string path = Path.Combine(shellService.Settings.DownloadLocation, "Index"); try { if (Directory.Exists(path)) { { IReadOnlyList <IBlog> files = await GetIBlogsAsync(path); foreach (IBlog file in files) { managerService.BlogFiles.Add(file); } BlogManagerFinishedLoading?.Invoke(this, EventArgs.Empty); if (shellService.Settings.CheckOnlineStatusAtStartup) { foreach (IBlog blog in files) { ICrawler downloader = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService); await downloader.IsBlogOnlineAsync(); } } } } } catch (Exception ex) { Logger.Verbose("ManagerController:LoadLibrary: {0}", ex); shellService.ShowError(ex, Resources.CouldNotLoadLibrary, ex.Data["Filename"]); } Logger.Verbose("ManagerController.LoadLibrary:End"); }
private void initializeLeechers() { if (_leechers == null) { _leechers = new List <LeecherBase>(); foreach (LeecherElement leecherConfiguration in AcroniManagerConfigurationSection.Instance.Leechers) { Type leecherType = ConfigurableBase.GetType(leecherConfiguration.Class, typeof(LeecherBase), "leecher"); Crawler crawler = CrawlerFactory.GetCrawler(leecherConfiguration); foreach (ExecutionElement executionConfiguration in leecherConfiguration.Executions) { ValidateConfiguration(executionConfiguration); LeecherBase leecher = ConfigurableBase.CreateConfigurableItem <LeecherBase>(leecherType, executionConfiguration.Parameters); leecher.Execution = ExecutionFactory.GetExecution(executionConfiguration, crawler); _leechers.Add(leecher); } } } }
private async Task CheckStatusAsync() { // foreach (IBlog blog in selectionService.SelectedBlogFiles.ToArray()) // { // ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), //new Progress<DownloadProgress>(), shellService, crawlerService, managerService); // await crawler.IsBlogOnlineAsync(); // } await Task.Run(async() => { var semaphoreSlim = new SemaphoreSlim(25); IEnumerable <IBlog> blogs = selectionService.SelectedBlogFiles.ToArray(); IEnumerable <Task> tasks = blogs.Select(async blog => { await semaphoreSlim.WaitAsync(); ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService); await crawler.IsBlogOnlineAsync(); semaphoreSlim.Release(); }); await Task.WhenAll(tasks); }); }