Пример #1
0
        public static void Main(string[] args)
        {
            string log4netPath = AppDomain.CurrentDomain.BaseDirectory + "log4net.xml";
            LogHelper.Initialize(log4netPath);
            string configPath = ConfigurationManager.AppSettings["ConfigurationFile"];
            if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath))
            {
                Console.WriteLine("Configuration file missing. \nPress any key to exit...");
                Console.ReadKey();
                return;
            }

            string config = File.ReadAllText(configPath);
            List<SiteParameter> siteParameters = JsonConvert.DeserializeObject<List<SiteParameter>>(config);

            foreach (var parameter in siteParameters)
            {
                LogHelper.WriteInfo($"Starting crawler for {parameter.MerchantNamePattern}");
                var crawler = CrawlerFactory.Create(parameter);
                crawler.Crawl();
                LogHelper.WriteInfo($"Crawling {parameter.MerchantNamePattern} done.");
            }

#if DEBUG
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
#endif
        }
        void crawl(string url)
        {
            //解析dom元素
            Crawler crawler = CrawlerFactory.getCrawler(url);

            if (crawler != null)
            {
                crawler.crawl(browser.Document);
            }
        }
Пример #3
0
        public static void Main(string[] args)
        {
            string connectionString = ConfigurationManager.ConnectionStrings["MySqlConStr"].ConnectionString;

            CrawlerDbHelper.Init(connectionString);
            using (var context = CrawlerDbHelper.GetContext())
            {
                if (!context.Database.Exists())
                {
                    // TODO: Create database here
                }
            }

            string logLevel = ConfigurationManager.AppSettings["MultiLogger.LogLevel"];

            if (!string.IsNullOrWhiteSpace(logLevel))
            {
                LogType level = LogType.Information;
                Enum.TryParse <LogType>(logLevel, out level);
                Logging.LogLevel = level;
            }

            string fileLoggerPath = ConfigurationManager.AppSettings["FileLogger.Path"];

            if (!string.IsNullOrWhiteSpace(fileLoggerPath))
            {
                FileLogger fileLogger = new FileLogger(fileLoggerPath);
                Logging.Loggers.Add(fileLogger);
            }

            string configPath = ConfigurationManager.AppSettings["ConfigurationFile"];

            if (string.IsNullOrWhiteSpace(configPath) || !File.Exists(configPath))
            {
                Console.WriteLine("Configuration file missing. \nPress any key to exit...");
                Console.ReadKey();
                return;
            }

            string config = File.ReadAllText(configPath);
            List <SiteParameter> siteParameters = JsonConvert.DeserializeObject <List <SiteParameter> >(config);

            foreach (var parameter in siteParameters)
            {
                Logging.WriteEntry("Main", LogType.Information, $"Starting crawler for {parameter.SiteName}");
                var crawler = CrawlerFactory.Create(parameter);
                crawler.Crawl(parameter);
                Logging.WriteEntry("Main", LogType.Information, $"Crawling {parameter.SiteName} done.");
            }

            #if DEBUG
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
            #endif
        }
Пример #4
0
        public void FAZER_ESSE_METODO()
        {
            var args = new string[]
            {
                "--termo", "sem você a vida é tão", "--tipo", "0"
            };

            var factory = new CrawlerFactory(args);

            Assert.AreEqual(factory.GetCrawler().GetImagesAsync().GetAwaiter().GetResult().Count, 1);
        }
Пример #5
0
        public void Dropbox_Uma_Partitura()
        {
            var args = new List <string>()
            {
                "--termo", "sem você a vida é tão", "--tipo", "1"
            };

            var factory = new CrawlerFactory(args.ToArray());

            Assert.AreEqual(factory.GetCrawler().GetImagesAsync().GetAwaiter().GetResult().Count, 1);
        }
Пример #6
0
 private async Task CheckBlogsOnlineStatus()
 {
     if (shellService.Settings.CheckOnlineStatusAtStartup)
     {
         await Task.Run(async() =>
         {
             IEnumerable <IBlog> blogs = managerService.BlogFiles;
             foreach (IBlog blog in blogs)
             {
                 ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
                                                              new Progress <DownloadProgress>(), shellService,
                                                              crawlerService, managerService);
                 await crawler.IsBlogOnlineAsync();
             }
         });
     }
 }
Пример #7
0
        private async Task AddBlogAsync(string blogUrl)
        {
            if (string.IsNullOrEmpty(blogUrl))
            {
                blogUrl = crawlerService.NewBlogUrl;
            }

            // TODO: Dependency, not SOLID!
            IBlog blog;

            try
            {
                blog = BlogFactory.GetBlog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index"));
            }
            catch (ArgumentException)
            {
                return;
            }

            blog = settingsService.TransferGlobalSettingsToBlog(blog);
            ICrawler crawler = CrawlerFactory.GetCrawler(blog.BlogType, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, blog);
            await crawler.IsBlogOnlineAsync();

            if (CheckIfTumblrPrivateBlog(blog))
            {
                blog    = PromoteTumblrBlogToPrivateBlog(blog);
                crawler = CrawlerFactory.GetCrawler(blog.BlogType, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, blog);
            }

            await crawler.UpdateMetaInformationAsync();

            lock (lockObject)
            {
                if (managerService.BlogFiles.Any(blogs => blogs.Name.Equals(blog.Name) && blogs.BlogType.Equals(blog.BlogType)))
                {
                    shellService.ShowError(null, Resources.BlogAlreadyExist, blog.Name);
                    return;
                }

                if (blog.Save())
                {
                    QueueOnDispatcher.CheckBeginInvokeOnUI((Action)(() => managerService.BlogFiles.Add(blog)));
                }
            }
        }
Пример #8
0
 private async Task CheckBlogsOnlineStatusAsync()
 {
     if (shellService.Settings.CheckOnlineStatusOnStartup)
     {
         await Task.Run(async() =>
         {
             var semaphoreSlim         = new SemaphoreSlim(25);
             IEnumerable <IBlog> blogs = managerService.BlogFiles;
             IEnumerable <Task> tasks  = blogs.Select(async blog =>
             {
                 await semaphoreSlim.WaitAsync();
                 ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
                                                              new Progress <DownloadProgress>(), shellService, crawlerService, managerService);
                 await crawler.IsBlogOnlineAsync();
                 semaphoreSlim.Release();
             });
             await Task.WhenAll(tasks);
         });
     }
 }
Пример #9
0
        private async Task AddBlogAsync(string blogUrl)
        {
            if (string.IsNullOrEmpty(blogUrl))
            {
                blogUrl = crawlerService.NewBlogUrl;
            }

            IBlog blog;

            try
            {
                blog = BlogFactory.GetBlog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index"));
            }
            catch (ArgumentException)
            {
                return;
            }

            if ((blog.GetType() == typeof(TumblrBlog)) && await TumblrBlogDetector.IsHiddenTumblrBlog(blog.Url))
            {
                blog = PromoteTumblrBlogToHiddenBlog(blog);
            }

            lock (lockObject)
            {
                if (managerService.BlogFiles.Any(blogs => blogs.Name.Equals(blog.Name) && blogs.BlogType.Equals(blog.BlogType)))
                {
                    shellService.ShowError(null, Resources.BlogAlreadyExist, blog.Name);
                    return;
                }

                if (blog.Save())
                {
                    AddToManager(blog);
                }
            }

            blog = settingsService.TransferGlobalSettingsToBlog(blog);
            ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService, crawlerService, managerService);
            await crawler.UpdateMetaInformationAsync();
        }
Пример #10
0
        private async Task LoadLibrary()
        {
            Logger.Verbose("ManagerController.LoadLibrary:Start");
            managerService.BlogFiles.Clear();
            string path = Path.Combine(shellService.Settings.DownloadLocation, "Index");

            try
            {
                if (Directory.Exists(path))
                {
                    {
                        IReadOnlyList <IBlog> files = await GetIBlogsAsync(path);

                        foreach (IBlog file in files)
                        {
                            managerService.BlogFiles.Add(file);
                        }

                        BlogManagerFinishedLoading?.Invoke(this, EventArgs.Empty);

                        if (shellService.Settings.CheckOnlineStatusAtStartup)
                        {
                            foreach (IBlog blog in files)
                            {
                                ICrawler downloader = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(), new Progress <DownloadProgress>(), shellService,
                                                                                crawlerService, managerService);
                                await downloader.IsBlogOnlineAsync();
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Verbose("ManagerController:LoadLibrary: {0}", ex);
                shellService.ShowError(ex, Resources.CouldNotLoadLibrary, ex.Data["Filename"]);
            }
            Logger.Verbose("ManagerController.LoadLibrary:End");
        }
Пример #11
0
        private void initializeLeechers()
        {
            if (_leechers == null)
            {
                _leechers = new List <LeecherBase>();

                foreach (LeecherElement leecherConfiguration in AcroniManagerConfigurationSection.Instance.Leechers)
                {
                    Type leecherType = ConfigurableBase.GetType(leecherConfiguration.Class, typeof(LeecherBase), "leecher");

                    Crawler crawler = CrawlerFactory.GetCrawler(leecherConfiguration);

                    foreach (ExecutionElement executionConfiguration in leecherConfiguration.Executions)
                    {
                        ValidateConfiguration(executionConfiguration);
                        LeecherBase leecher = ConfigurableBase.CreateConfigurableItem <LeecherBase>(leecherType, executionConfiguration.Parameters);
                        leecher.Execution = ExecutionFactory.GetExecution(executionConfiguration, crawler);
                        _leechers.Add(leecher);
                    }
                }
            }
        }
Пример #12
0
 private async Task CheckStatusAsync()
 {
     //        foreach (IBlog blog in selectionService.SelectedBlogFiles.ToArray())
     //        {
     //            ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
     //new Progress<DownloadProgress>(), shellService, crawlerService, managerService);
     //            await crawler.IsBlogOnlineAsync();
     //        }
     await Task.Run(async() =>
     {
         var semaphoreSlim         = new SemaphoreSlim(25);
         IEnumerable <IBlog> blogs = selectionService.SelectedBlogFiles.ToArray();
         IEnumerable <Task> tasks  = blogs.Select(async blog =>
         {
             await semaphoreSlim.WaitAsync();
             ICrawler crawler = CrawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
                                                          new Progress <DownloadProgress>(), shellService, crawlerService, managerService);
             await crawler.IsBlogOnlineAsync();
             semaphoreSlim.Release();
         });
         await Task.WhenAll(tasks);
     });
 }