public async Task CrawlService_CanCrawlFromChannelIdAsync() { var video = new YouTubeVideo() { Id = "id", MentionedVideos = new string[] { } }; var videoIds = new [] { "id" }; var youtubeMock = new Mock <IYouTubeApi>(); youtubeMock .Setup(s => s.GetVideoById("id")) .ReturnsAsync(video); youtubeMock .Setup(s => s.GetVideoIdsByChannelId("channel_id")) .ReturnsAsync(videoIds.AsEnumerable()); var dbMock = new Mock <IRepository <YouTubeVideo> >(); dbMock.Setup(db => db.Contains("id")).ReturnsAsync(false); var crawler = new CrawlerService( youtubeMock.Object, dbMock.Object, Mock.Of <ILogger <CrawlerService> >()); await crawler.CrawlChannelAsync("channel_id", 1); youtubeMock.Verify(y => y.GetVideoIdsByChannelId("channel_id")); youtubeMock.Verify(y => y.GetVideoById("id")); dbMock.Verify(m => m.Contains("id")); dbMock.Verify(m => m.Set(video, "id")); }
public async Task CrawlService_CanRetrieveInfoFromSingleVideoId() { var video = new YouTubeVideo() { Id = "id", MentionedVideos = new [] { "mentioned_id" } }; var linkedVideo = new YouTubeVideo() { Id = "mentioned_id", MentionedVideos = Enumerable.Empty <string>() }; var youtubeMock = new Mock <IYouTubeApi>(); youtubeMock .Setup(s => s.GetVideoById("mentioned_id")) .Returns(Task.FromResult(linkedVideo)); var dbMock = new Mock <IRepository <YouTubeVideo> >(); var crawler = new CrawlerService( youtubeMock.Object, dbMock.Object, Mock.Of <ILogger <CrawlerService> >()); await crawler.Crawl(video, 2); youtubeMock.Verify(m => m.GetVideoById("mentioned_id")); dbMock.Verify(m => m.Contains("id")); dbMock.Verify(m => m.Set(video, "id")); dbMock.Verify(m => m.Set(linkedVideo, "mentioned_id")); }
public IHttpActionResult GTX1660([FromBody] DTOProdutoPesquisa pesquisa) { try { CrawlerService crawlerService = new CrawlerService(); var produtos = crawlerService.PesquisarGTX1660(pesquisa); var retornoApi = new { data = produtos, message = produtos.Count > 0 ? "Encontramos preços do(a) GTX 1660" : "Não encontramos preços do(a) 1660" }; if (produtos.Count() > 0) { return(Content(HttpStatusCode.Found, retornoApi)); } else { return(Content(HttpStatusCode.NoContent, retornoApi)); } } catch (Exception ex) { return(InternalServerError(ex)); } }
public virtual async Task <List <(string, DateTime?)> > GetMovieUrlsByPage(int currentPage) { string url = string.Format(PageUrl, currentPage); var pageResult = new List <(string, DateTime?)>(); var movieCrawler = new CrawlerService(); movieCrawler.OnStart += (s, e) => { Console.WriteLine($"开始抓取第{currentPage}页"); }; movieCrawler.OnError += (s, e) => { Console.WriteLine($"第{currentPage}页,抓取出错了,地址【{ e.Uri}】", e.Exception); Thread.Sleep(1000 * 5);//暂停一下.. }; movieCrawler.OnCompleted += (s, e) => { pageResult = PageMovieRegex.Matches(e.PageSource).Select(m => (Domain + m.Groups[1].Value, m.Groups[2]?.Value?.ToDateTime())).ToList(); if (pageResult.Count == 0) { Console.WriteLine($"第{currentPage}页未抓取到数据,地址【{ e.Uri}】"); Thread.Sleep(1000 * 5);//暂停一下.. } }; if (Website == SysMovieWebsite.ZYkuyun) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); await movieCrawler.Start(new Uri(url), Encoding.GetEncoding("GB2312")); } else { await movieCrawler.Start(new Uri(url)); } return(pageResult); }
private static void StartCrawling(object state) { using (var context = new Context("name=MySql")) { var unitOfWork = new UnitOfWork(context); var crawlerService = new CrawlerService(unitOfWork); var rawHtmlService = new RawHtmlService(unitOfWork); foreach (var crawlableUrl in crawlerService.GetCrawlableUrls() .Where(x => x.IsActivated == true)) { Uri uri = null; if (Uri.TryCreate(crawlableUrl.RawUrl, UriKind.Absolute, out uri)) { var rawHtmlTask = GetRawHtmlAsync(uri); rawHtmlTask.Wait(); var rawHtml = rawHtmlTask.Result; if (rawHtml.Length <= 0) { Console.WriteLine("rawHtml size is too small: " + crawlableUrl.RawUrl); } else { rawHtmlService.SaveRawHtmlAsByteArray(rawHtmlTask.Result, crawlableUrl.RawUrlId); Console.WriteLine("crawling was successful: " + crawlableUrl.RawUrl); } } else { Console.WriteLine("cannot create uri: " + crawlableUrl.RawUrl); } } } }
public static async Task <string> GetSentenceAsync(SentenceModo?sentenceModo) { try { // Inicializa a criação de uma página page = await CrawlerService.OpenAsync("https://lerolero.com/"); // Caso informado, realiza a seleção de um novo modo de sentença if (sentenceModo.HasValue) { await SelectSentenceModo(sentenceModo.Value); } // Obtêm o valor da setence string setence = await ReadSentenceAsync(); // Solicita o encerramento da execução da instância do navegador await CrawlerService.CloseAsync(); return(setence); } catch (System.Exception) { return(SentenceServices.GetSentenceByJson()); } }
public void Test_Get_Search_Results_Without_Selected_Client_Setting() { // ARRANGE _appSettings = new AppSettings { SelectedHttpClient = "" }; _mockOptions.Setup(_ => _.Value).Returns(_appSettings); var expectedSearchResultString = "<li class=\"b_algo\"><h2><a href=\"http://www.infotrack.com.au\"></a></h2>"; var httpResponseMessage = new HttpResponseMessage { StatusCode = HttpStatusCode.OK, Content = new StringContent(expectedSearchResultString) }; var handlerMockObject = GetHandlerMock(httpResponseMessage); var mockIHttpClientFactoryObject = GetMockIHttpClientFactory(handlerMockObject); // ACT ICrawlerService crawlerService = new CrawlerService(mockIHttpClientFactoryObject, _logger.Object, _mockConfig.Object, _mockOptions.Object); // ASSERT Assert.ThrowsAsync <ArgumentException>(async() => await crawlerService.GetSearchResults("online title search"), "Default HttpClient should be set in the config"); }
public IHttpActionResult Email([FromBody] DTONotificacaoProduto notificacaoProduto) { try { EmailNotification notification = new EmailNotification(); NotificacaoProdutoService notifyServ = new NotificacaoProdutoService(); DestinarioService destinarioService = new DestinarioService(); CrawlerService crawler = new CrawlerService(); var produtosParaNotificar = notifyServ.ListarNotificacoes(notificacaoProduto.EmailDestinario); var destinario = destinarioService.GetDestinario(notificacaoProduto.EmailDestinario); List <Produto> listaProdutos = new List <Produto>(); foreach (var produtos in produtosParaNotificar) { var listaProdutosIteracao = crawler.PesquisarProduto(new DTOProdutoPesquisa { produto = produtos.NomeProduto, valor_produto_min = produtos.ValorMinProduto, valor_produto_max = produtos.ValorMaxProduto }).Take(4); listaProdutosIteracao.ForEach(x => listaProdutos.Add(x)); } notification.Notificar(listaProdutos, destinario); return(Ok(new { message = "E-mail enviado com sucesso." })); } catch (Exception ex) { return(InternalServerError(ex)); } }
public async Task Test_Get_Search_Results_Success() { // ARRANGE var expectedSearchResultString = "<li class=\"b_algo\"><h2><a href=\"http://www.infotrack.com.au\"></a></h2>"; var httpResponseMessage = new HttpResponseMessage { StatusCode = HttpStatusCode.OK, Content = new StringContent(expectedSearchResultString) }; var handlerMockObject = GetHandlerMock(httpResponseMessage); var mockIHttpClientFactoryObject = GetMockIHttpClientFactory(handlerMockObject); // ACT ICrawlerService crawlerService = new CrawlerService(mockIHttpClientFactoryObject, _logger.Object, _mockConfig.Object, _mockOptions.Object); var actualSearchResults = await crawlerService.GetSearchResults("online title search"); // ASSERT Assert.That(actualSearchResults, Is.Not.Empty); Assert.That(actualSearchResults, Is.EqualTo(expectedSearchResultString)); }
public virtual async Task <int> GetPageCount() { int pageCount = 0; var movieCrawler = new CrawlerService(); movieCrawler.OnStart += (s, e) => { Console.WriteLine($"开始抓取总页数"); }; movieCrawler.OnError += (s, e) => { Console.WriteLine($"总页数抓取出错了,地址【{ e.Uri}】", e.Exception); Thread.Sleep(1000 * 5);//暂停一下.. }; movieCrawler.OnCompleted += (s, e) => { var match = PageCountRegex.Match(e.PageSource); pageCount = match.Groups[1].Value.ToInt(); }; if (Website == SysMovieWebsite.ZYkuyun) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); await movieCrawler.Start(new Uri(Domain + "list/?0.html"), Encoding.GetEncoding("GB2312")); } else { await movieCrawler.Start(new Uri(Domain)); } return(pageCount); }
public void Test_Get_Search_Results_Not_Success_Status_Code() { // ARRANGE var expectedSearchResultString = "<li class=\"b_algo\"><h2><a href=\"http://www.infotrack.com.au\"></a></h2>"; var httpResponseMessage = new HttpResponseMessage { StatusCode = HttpStatusCode.BadRequest, Content = new StringContent(expectedSearchResultString) }; var handlerMockObject = GetHandlerMock(httpResponseMessage); var mockIHttpClientFactoryObject = GetMockIHttpClientFactory(handlerMockObject); // ACT ICrawlerService crawlerService = new CrawlerService(mockIHttpClientFactoryObject, _logger.Object, _mockConfig.Object, _mockOptions.Object); // ASSERT Assert.ThrowsAsync <HttpRequestException>(async() => await crawlerService.GetSearchResults("online title search"), "Bad Request"); }
static void Main(string[] args) { var argumentsFactory = new ArgumentsFactory(args, ArgumentsConstants.FILE_SIZE_ARGUMENT, ArgumentsConstants.BUFFER_LENGTH_ARGUMENT, ArgumentsConstants.PATH_ARGUMENT); var fileSizeArgument = argumentsFactory.CreateArgument <FileSizeArgument>(ArgumentsConstants.FILE_SIZE_ARGUMENT); var bufferLengthArgument = argumentsFactory.CreateArgument <BufferLengthArgument>(ArgumentsConstants.BUFFER_LENGTH_ARGUMENT); var pathArgument = argumentsFactory.CreateArgument <PathArgument>(ArgumentsConstants.PATH_ARGUMENT); Console.WriteLine("Starting application..."); var crawler = new CrawlerService(new SeleniumService()) .Setup(); var crawlerStatement = new CrawlerStatement(crawler); var sentence = crawlerStatement.GetSentence(); var bytesCount = crawlerStatement.GetBytesCount(sentence); Console.WriteLine("Starting to write in the file using buffer..."); var writeBuffer = new WriteBuffer(bufferLengthArgument.GetValue()) .StringInput(sentence) .BytesCount(bytesCount); var report = new FileService(pathArgument.GetValue(), fileSizeArgument.GetValue()) .WriteUsingBufferUntilEnd(writeBuffer) .Report(); Console.WriteLine("Writing to file successfully completed..."); report.Print(); }
public void Test_Get_Search_Results_Without_Providing_Search_Terms() { // ARRANGE var expectedSearchResultString = "<li class=\"b_algo\"><h2><a href=\"http://www.infotrack.com.au\"></a></h2>"; var httpResponseMessage = new HttpResponseMessage { StatusCode = HttpStatusCode.BadRequest, Content = new StringContent(expectedSearchResultString) }; var handlerMockObject = GetHandlerMock(httpResponseMessage); var mockIHttpClientFactoryObject = GetMockIHttpClientFactory(handlerMockObject); // ACT ICrawlerService crawlerService = new CrawlerService(mockIHttpClientFactoryObject, _logger.Object, _mockConfig.Object, _mockOptions.Object); // ASSERT Assert.ThrowsAsync <ArgumentException>(async() => await crawlerService.GetSearchResults(""), "Search terms must be provided!"); }
private static void StartJobs(IWebHost webHost) { var adapter = (IBotFrameworkHttpAdapter)webHost .Services .GetService(typeof(IBotFrameworkHttpAdapter)); var storage = (IStorage)webHost .Services .GetService(typeof(IStorage)); var configuration = (IConfiguration)webHost .Services .GetService(typeof(IConfiguration)); var notifierLogger = (ILogger <NotifyService>)webHost .Services .GetService(typeof(ILogger <NotifyService>)); var crawlerLogger = (ILogger <CrawlerService>)webHost .Services .GetService(typeof(ILogger <CrawlerService>)); //notifier var notifier = new NotifyService(adapter, configuration["MicrosoftAppId"], storage, notifierLogger); Task.Factory.StartNew(notifier.Run, TaskCreationOptions.LongRunning); //crawler var crowler = new CrawlerService(storage, crawlerLogger); Task.Factory.StartNew(crowler.Run, TaskCreationOptions.LongRunning); }
// GET: /<controller>/ public CrawlerController(IMapper _mapper) { crawlerService = new CrawlerService(); mapper = _mapper; logger = NLog.Web.NLogBuilder.ConfigureNLog(AppConfiguration.NLogPath).GetCurrentClassLogger(); }
public async Task Test_Get_Search_Results_With_Max_Search_Search_Results_Config() { // ARRANGE var expectedSearchResultString = "<li class=\"b_algo\"><h2><a href=\"http://www.infotrack.com.au\"></a></h2>"; _mockConfig.SetupGet(c => c[$"HttpClientSettings:{_appSettings.SelectedHttpClient}:MaxSearchResults"]) .Returns("100") .Verifiable(); var httpResponseMessage = new HttpResponseMessage { StatusCode = HttpStatusCode.OK, Content = new StringContent(expectedSearchResultString) }; var handlerMockObject = GetHandlerMock(httpResponseMessage); var mockIHttpClientFactoryObject = GetMockIHttpClientFactory(handlerMockObject); // ACT ICrawlerService crawlerService = new CrawlerService(mockIHttpClientFactoryObject, _logger.Object, _mockConfig.Object, _mockOptions.Object); var searchResults = await crawlerService.GetSearchResults("online title search"); // ASSERT _mockConfig.Verify(mock => mock[$"HttpClientSettings:{_appSettings.SelectedHttpClient}:MaxSearchResults"], Times.Once); }
public static void Main(string[] args) { SeedNewPages(); var crawlerService = new CrawlerService(); crawlerService.Start(); }
public void ImgSearchImgUrlTest() { var result = CrawlerService.ImgSearchImg("test.png"); foreach (var keyword in result) { Console.WriteLine(keyword); } }
public void CrawlBaiduImgTest() { List <String> UrlList; CrawlerService.CrawlBaiduImg("http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=C%23", out UrlList); foreach (var Url in UrlList) { Console.WriteLine(Url); } }
public void SearchKeywordTest() { string[] keywordList = new string[] { "C#", "风景", "Donald Trump", "唐纳德·特朗普" }; string url; foreach (string keyword in keywordList) { url = CrawlerService.SearchKeyword(keyword); Console.WriteLine(url); } }
static void Main(string[] args) { try { ICatalogService service = new CatalogService(); IProviderManager manager = new ProviderManager(); ICrawlerService crawlerService = new CrawlerService(); if (args.Length > 0) { int providerID = 0; if (int.TryParse(args[0], out providerID)) { Data.Provider p = crawlerService.GetProvider(providerID); if (p != null) { IProvider provider = GetProviderImplementation(p, service, crawlerService); if (provider != null) { manager.Add(provider); manager.ExecuteAll(); } } } } else { IEnumerable <CashBack.Data.Provider> providers = crawlerService.GetActiveProviders(); foreach (CashBack.Data.Provider p in providers) { IProvider provider = GetProviderImplementation(p, service, crawlerService); if (provider != null) { if (p.LastRun.HasValue) { TimeSpan ts = DateTime.Now.Subtract(p.LastRun.Value); if (ts.Minutes >= p.RunInterval) { manager.Add(provider); } } else { manager.Add(provider); } } } //Execute all providers manager.ExecuteAll(); } } catch { } }
public UserAlertModule( MangaService mangaService, UserAlertService userAlertService, CrawlerService crawlerService, AppDbContext context, LocaledResourceManager <UserAlertModuleResource> resourceManager) { this.mangaService = mangaService; this.userAlertService = userAlertService; this.crawlerService = crawlerService; this.dbContext = context; this.resourceManager = resourceManager; }
public PrivateAlertModule( MangaService mangaService, PrivateAlertService privateAlertService, CrawlerService crawlerService, AppDbContext dbContext, LocaledResourceManager <PrivateAlertModuleResource> resourceManager) { this.mangaService = mangaService; this.privateAlertService = privateAlertService; this.crawlerService = crawlerService; this.dbContext = dbContext; this.resourceManager = resourceManager; }
private static async Task MainAsync(string[] args) { // get menu records var crawlerService = new CrawlerService(); var rootRegion = await crawlerService.GetStorefrontRegionsAsync(); var listings = await crawlerService.GetListingsOnLeafRegionsAsync(rootRegion); var menuItems = await crawlerService.GetMenuItems(listings); // save records to database var repository = new WmRepository(); repository.InsertMenuItems(menuItems); }
public SettingsViewModel(ISettingsView view, IShellService shellService, CrawlerService crawlerService, ExportFactory <AuthenticateViewModel> authenticateViewModelFactory) : base(view) { ShellService = shellService; settings = ShellService.Settings; CrawlerService = crawlerService; this.authenticateViewModelFactory = authenticateViewModelFactory; this.folderBrowser = new FolderBrowserDataModel(); this.displayFolderBrowserCommand = new DelegateCommand(DisplayFolderBrowser); this.authenticateCommand = new DelegateCommand(Authenticate); this.enableAutoDownloadCommand = new DelegateCommand(EnableAutoDownload); Load(); view.Closed += ViewClosed; folderBrowser.PropertyChanged += FolderBrowserPropertyChanged; }
public QueueController(IFileDialogService fileDialogService, IShellService shellService, IEnvironmentService environmentService, CrawlerService crawlerService, ISelectionService selectionService, Lazy <QueueViewModel> queueViewModel) { this.fileDialogService = fileDialogService; this.shellService = shellService; this.queueViewModel = queueViewModel; this.environmentService = environmentService; this.crawlerService = crawlerService; this.selectionService = selectionService; this.removeSelectedCommand = new DelegateCommand(RemoveSelected, CanRemoveSelected); this.showBlogPropertiesCommand = new DelegateCommand(ShowBlogProperties); this.openQueueCommand = new DelegateCommand(OpenList); this.saveQueueCommand = new DelegateCommand(SaveList); this.clearQueueCommand = new DelegateCommand(ClearList); this.openQueuelistFileType = new FileType(Resources.Queuelist, SupportedFileTypes.QueueFileExtensions); this.saveQueuelistFileType = new FileType(Resources.Queuelist, SupportedFileTypes.QueueFileExtensions.First()); }
public async Task <List <string> > GetMovieTypes() { List <string> types = new List <string>(); var movieCrawler = new CrawlerService(); movieCrawler.OnStart += (s, e) => { Console.WriteLine($"开始抓取总页数"); }; movieCrawler.OnError += (s, e) => { Console.WriteLine($"总页数抓取出错了,地址【{ e.Uri}】", e.Exception); Thread.Sleep(1000 * 5);//暂停一下.. }; movieCrawler.OnCompleted += (s, e) => { var matchs = MovieTypeRegex.Matches(e.PageSource); for (int i = 0; i < matchs.Count; i++) { if (matchs[i].Groups.Count == 2 && !matchs[i].Groups[1].Value.IsNullOrEmpty()) { types.Add(matchs[i].Groups[1].Value); } else if (matchs[i].Groups.Count == 3 && !matchs[i].Groups[1].Value.IsNullOrEmpty()) { types.Add(matchs[i].Groups[1].Value); } else if (matchs[i].Groups.Count == 3 && !matchs[i].Groups[2].Value.IsNullOrEmpty()) { types.Add(matchs[i].Groups[2].Value); } } }; if (Website == SysMovieWebsite.ZYkuyun) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); await movieCrawler.Start(new Uri(Domain), Encoding.GetEncoding("GB2312")); } else { await movieCrawler.Start(new Uri(Domain)); } return(types); }
public void Should_setup_correctly() { // Impossível mockar as classes do Selenium... var seleniumServiceMock = new Mock <ISeleniumService>(); var chromeDriverMock = new Mock <ICustomChromeDriver>(); var chromeDriverMockWait = new Mock <ICustomWebDriverWait>(); seleniumServiceMock.Setup(x => x.CreateChromeDriver()) .Returns(chromeDriverMock.Object); seleniumServiceMock.Setup(x => x.CreateWebDriverWait(It.IsAny <ICustomChromeDriver>(), It.IsAny <int>())) .Returns(chromeDriverMockWait.Object); var crawlerService = new CrawlerService(seleniumServiceMock.Object); crawlerService.Setup(); Assert.NotNull(crawlerService.Driver); Assert.NotNull(crawlerService.Wait); crawlerService.Quit(); }
static async Task Main(string[] args) { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Ssl3; var urlSite = ConfigurationManager.AppSettings["URL_SITE"].ToString(); if (string.IsNullOrEmpty(urlSite)) { Console.WriteLine("URL do site não configurado."); return; } var crawlerService = new CrawlerService(urlSite); await crawlerService.StartCrawlerAync(); Console.WriteLine(""); Console.WriteLine("Precione enter para fechar o programa..."); Console.ReadKey(); }
public static async Task <int> GetBytesAsync(string sentence) { try { // Inicializa a criação de uma página page = await CrawlerService.OpenAsync("https://mothereff.in/byte-counter/"); // Realiza a escrita da sentença obtida no textarea await WriteSentenceAsync(sentence); int bytes = await GetBytesAsync(); // Solicita o encerramento da execução da instância do navegador await CrawlerService.CloseAsync(); return(bytes); } catch { return(ByteCounterService.GetByteCount(sentence)); } }