public void initialize() { try { string uri = ConfigurationManager.AppSettings["uri"]; if (Utility.validateURI(uri)) { _scraper = new Scraper(uri, _xpathPostsNode); } else { throw new ApplicationException("The URI (" + uri + ") found in the configuration file is not valid."); } } catch (ApplicationException ex) { Console.WriteLine("An error occurred while initializing scraping - " + ex.Message); System.Environment.Exit(1); } catch (Exception) { Console.WriteLine("An error occurred while initializing scraping."); System.Environment.Exit(2); } }
private const int delay = 1000; //in ms public CvOnlineScrapeService(IScraper scraper, CvOnlineConfiguration configuration) : base(scraper, configuration) { _scraper = scraper; _scrapeSettings = configuration; }
public void Setup() { Scraper = new GoogleScraper(new Mock <ILogger <GoogleScraper> >().Object); HtmlPage = new HtmlDocument(); HtmlPage.Load(@"ProvidersTests\GoogleTests\GoogleHtmlPage.txt"); }
public HackerNewsTopPostScraper(IScraper <Post[]> postScraper) { // In practice the IScraper<Post[]> provided here is a ConvertingScraper<ScrapedPostContent[], Post[]> which uses a // HackerNewsPostPageScraper to read ScrapedPostContent[] from a page, and ScrapedPostContentParser to convert those // items into Posts. _postScraper = postScraper; }
private async Task FetchNewMeetings() { foreach (Source source in await db.Sources.ToListAsync().ConfigureAwait(false)) { ISet <string> seenMeetings = new HashSet <string>(await db.Meetings.Where(m => m.Source == source).Select(m => m.Url.ToString()).ToListAsync().ConfigureAwait(false)); IScraper scraper = CreateScraper(source.Url); IEnumerable <Meeting> meetings = await scraper.FindMeetings(null, seenMeetings).ConfigureAwait(false); foreach (Meeting meeting in meetings) { meeting.Source = source; await db.Meetings.AddAsync(meeting).ConfigureAwait(false); IEnumerable <Document> documents = await scraper.GetDocuments(meeting).ConfigureAwait(false); foreach (Document document in documents) { document.Meeting = meeting; await GetText(document).ConfigureAwait(false); await db.Documents.AddAsync(document).ConfigureAwait(false); await db.SaveChangesAsync().ConfigureAwait(false); } } } }
protected static List <IScraper> InitScrapers(Dictionary <int, string> urlDic) { List <IScraper> scraperList = new List <IScraper>(); foreach (KeyValuePair <int, string> item in urlDic) { IScraper scraper = null; if (Utility.findString(item.Value, "amazon")) { scraper = new AmazonScraper(item.Key, item.Value); } else if (Utility.findString(item.Value, "backmarket")) { scraper = new BackMarketScraper(item.Key, item.Value); } if (scraper != null) { scraperList.Add(scraper); } } if (scraperList.Count == 0) { throw new System.InvalidOperationException("There is no scraper recognized in the list!"); } return(scraperList); }
public string Create(string type, [FromBody] Scraper s) { if (s == null) { /* The JSON sent was not in the correct format */ Response.StatusCode = 400; /* Bad Request */ var error = new LexicalAnalyzer.Models.Error(); error.Message = "Invalid structure for Scraper object"; return(JsonConvert.SerializeObject(error)); } if (s.Status != "init") { var error = new LexicalAnalyzer.Models.Error(); error.Message = "Initial Scraper status must be 'init'"; return(JsonConvert.SerializeObject(error)); } IScraper scraper = m_scraperService.CreateScraper(type); if (scraper == null) { var error = new LexicalAnalyzer.Models.Error(); error.Message = String.Format( "Unknown scraper type '{0}'", type); return(JsonConvert.SerializeObject(error)); } scraper.Status = s.Status; scraper.Properties = s.Properties; return(JsonConvert.SerializeObject(scraper)); }
private void Button_Click(object sender, RoutedEventArgs e) { listBoxResult.Items.Clear(); if (!string.IsNullOrEmpty(textBoxUrl.Text)) { switch (scrapeSelect.SelectedItem.ToString()) { case "Online4chanScraper": IMAGESCRAPER = new Online4chanScraper(textBoxUrl.Text); break; case "OnlineHTMLScraper": IMAGESCRAPER = new OnlineHTMLScraper(textBoxUrl.Text); break; } if (IMAGESCRAPER != null) { List <Dump> result = IMAGESCRAPER.Scrape(); foreach (var item in result) { listBoxResult.Items.Add(item.Path); } } } }
public AppHost(IScraper scraper, ICompare compare, IOptions <ScraperConfiguration> config, IFluentEmailFactory fluentEmail) { _scraper = scraper; _compare = compare; _config = config.Value; _fluentEmail = fluentEmail; }
public GameResultsScrapedEventArgs(ICoreService eventCoreInstance, string gameFileName, IList<IGameScrapeResult> scrapedResults, IScraper scraper) : base(eventCoreInstance) { this.GameFileName = gameFileName; this.ScrapedResults = scrapedResults; this.GameScraper = scraper; }
public void Setup() { Scraper = new DuckDuckGoScraper(new Mock <ILogger <DuckDuckGoScraper> >().Object); HtmlPage = new HtmlDocument(); HtmlPage.Load(@"ProvidersTests\DuckDuckGoTests\DuckDuckGoHtmlPage.txt"); }
public DrNuClient(IResourceUriScraper resourceUriScraper, IResourceScraper resourceScraper, IResourceMapper resourceMapper, IProgramSlugScraper programSlugScraper, IProgramUriScraper programUriScraper) { if (resourceUriScraper == null) { throw new ArgumentNullException("resourceUriScraper"); } if (resourceScraper == null) { throw new ArgumentNullException("resourceScraper"); } if (resourceMapper == null) { throw new ArgumentNullException("resourceMapper"); } if (programSlugScraper == null) { throw new ArgumentNullException("programSlugScraper"); } if (programUriScraper == null) { throw new ArgumentNullException("programUriScraper"); } _resourceUriScraper = resourceUriScraper; _programUriScraper = programUriScraper; _resourceScraper = resourceScraper; _resourceMapper = resourceMapper; _programSlugScraper = programSlugScraper; }
private async Task HandleScrapingRequest(ScrapingRequest scrapingRequest, IScraper scraper) { try { var scrapingResult = await scraper.Scrape(new ScrapingContext { ScrapingOrchestrator = this, ScrapingRequest = scrapingRequest, Html = await _downloadManager.Download(scrapingRequest.Url, scraper.WebsiteEncoding) }); if (scrapingResult == null) { // No response to handle (scraper doesn't return a ScrapingResult) return; } if (scrapingResult.Exception != null) { _logger.LogError(scrapingResult.Exception.Message, scrapingResult.Exception); return; } await SaveScrapingResult(scrapingResult); } catch (Exception e) { _logger.LogError($"{e.GetType()}: {e.Message}\n{e.StackTrace}", e); } }
public static async Task<IPicture> ScrapeAsync(IScraper scraper) { using (var client = new HttpClient()) { // It looks like dilbert.com mucks with the charset ... intentionally? //var html = await client.GetStringAsync(scraper.Url); HttpResponseMessage response = await client.GetAsync(scraper.Url); response.EnsureSuccessStatusCode(); string charset = response.Content.Headers.ContentType.CharSet; if (charset.EndsWith("lias")) { charset = charset.Replace("lias", String.Empty); response.Content.Headers.ContentType.CharSet = charset; } string html = await response.Content.ReadAsStringAsync(); Uri imageUrl = await scraper.GetImageUrlAsync(html); // be sure the URL is really an image //Image image = await GetImageAsync(imageUrl); //byte[] imageBytes = await client.GetByteArrayAsync(imageUrl); IImage image = await GetImageAsync(imageUrl); return new Picture(scraper, imageUrl, image); } }
public ExtractDataJob(IPageService pageService, IScraper scraper, IFailureService failureService, ILogger <ExtractDataJob> logger) { _pageService = pageService; _scraper = scraper; _failureService = failureService; _logger = logger; }
public void Print(IScraper scraper) { if (scraper is FreeMarketScraper) { var freeMarketPlayers = scraper.Results().Cast <FreeMarketPlayer>(); var orderedPlayers = freeMarketPlayers .Where(x => x.ThreeDaysWages == 0) .OrderBy(x => x.Rating) .ThenByDescending(x => x.Age).ToList(); if (orderedPlayers.Count() > 0) { foreach (var player in orderedPlayers) { Console.WriteLine(player.ToString()); } } else { Console.WriteLine("No players found"); } } else { Console.WriteLine("Method not supported"); } }
public void Setup() { Scraper = new BingScraper(new Mock <ILogger <BingScraper> >().Object); HtmlPage = new HtmlDocument(); HtmlPage.Load(@"ProvidersTests\BingTests\BingHtmlPage.txt"); }
public FrameDataRequester(FrameDataSource dataSource = FrameDataSource.Default, IScraper scraper = null) { var container = new IocContainer(dataSource).Container; _downloader = scraper ?? container.GetInstance <IScraper>(); _parser = container.GetInstance <IParser>(); }
private void SelectPrinter(ConsoleKeyInfo keyInfo, IScraper scraper) { Console.Clear(); IPlayerPrinter printer; switch (keyInfo.KeyChar) { case '1': Console.WriteLine("Zero wage selected"); printer = new ZeroWagePrinter(); printer.Print(scraper); break; case '2': Console.WriteLine("Highest rating selected"); printer = new HighestRatingPrinter(); printer.Print(scraper); break; case '3': Console.WriteLine("Highest value selected"); printer = new HighestValuePrinter(); printer.Print(scraper); break; case 'x': Console.WriteLine("Exiting to action menu..."); break; default: Console.WriteLine("Option not supported."); break; } }
public DownloadViewModel(DownloadedChapterInfo downloadInfo, ISemaphore downloadSemaphore) { if (downloadInfo == null) throw new ArgumentNullException("downloadInfo"); if (downloadSemaphore == null) throw new ArgumentNullException("downloadSemaphore"); if (downloadInfo.ChapterRecord == null) throw new ArgumentException("Chapter record is invalid.", "downloadInfo"); if (String.IsNullOrEmpty(downloadInfo.ChapterRecord.ChapterId)) throw new ArgumentException("Chapter record id is invalid.", "downloadInfo"); if (downloadInfo.ChapterRecord.MangaRecord == null) throw new ArgumentException("Manga record is invalid.", "downloadInfo"); if (String.IsNullOrEmpty(downloadInfo.ChapterRecord.MangaRecord.MangaId)) throw new ArgumentException("Manga record id is invalid.", "downloadInfo"); _downloadInfo = downloadInfo; _downloadSemaphore = downloadSemaphore; _scraper = ScraperLoader.Instance.AllScrapers.FirstOrDefault(s => s.ScraperGuid == downloadInfo.ChapterRecord.Scraper); if (_scraper != null) { _downloader = _scraper.GetDownloader(); // register downloader events _downloader.DownloadProgress += _downloader_DownloadProgress; _downloader.DownloadCompleted += _downloader_DownloadCompleted; } if (!String.IsNullOrEmpty(_downloadInfo.Path)) { // file was already downloaded State = DownloadState.Unknown; Completed = true; } else { // we will be downloading the file now State = DownloadState.Ok; Completed = false; } CurrentActionText = String.Empty; _cancelDownloadCommand = new RelayCommand(Cancel, x => !Completed); _removeDownloadCommand = new RelayCommand(Remove); _openDownloadCommand = new RelayCommand(Open, x => DownloadExists); _retryDownloadCommand = new RelayCommand(RetryDownload, x => _downloader != null && Completed && !DownloadExists && !String.IsNullOrEmpty(_downloadInfo.DownloadFolder)); CancelText = ButtonCancelText; }
public PhotoController(IConfiguration config, IScraper scraper) { _scraper = scraper; _s3Client = new AmazonS3Client(config.GetValue <string>("awsAccessKeyId"), config.GetValue <string>("awsSecretAccessKey"), RegionEndpoint.EUWest2); _defaultBucketName = config.GetValue <string>("defaultBucketName"); _defaultWidth = config.GetValue <int>("defaultWidth"); _validBucketNames = config.GetValue <string>("validBucketNames").Split(';'); }
public ScraperTestManager(IScraper scraper) { Tests = GetType() .Assembly.GetTypes() .Where(t => !t.IsAbstract && typeof(ScraperTest <T>).IsAssignableFrom(t)) .Select(t => (ScraperTest <T>)ActivatorUtilities.CreateInstance(scraper.Services, t, scraper)) .ToArray(); }
public ScrapeResultRecorder(IScraper scraper, IScrapeResultSerializer serializer, IStorageClient storageClient, IUniqueClient uniqueClient, IUploadStatusRecorder statusRecorder) { _scraper = scraper; _serializer = serializer; _storageClient = storageClient; _uniqueClient = uniqueClient; _statusRecorder = statusRecorder; }
public MorningstarService(IScraper scraper, ISecurityRepository securityRepository, ICategoryRepository categoryRepository) { _scraper = scraper; _securityRepository = securityRepository; _categoryRepository = categoryRepository; _exchangeMapping = InitializeExchangeMapping(); }
/// <summary> /// Add an instance of a scraper to the list of tracked ones /// </summary> /// <param name="scraper"></param> public void Register(IScraper scraper) { if (!scrapers.Contains(scraper)) { scrapers.Add(scraper); } SaveScrapers(); }
public RecentMangaRecord(IMangaRecord mangaRecord) { if (mangaRecord == null) throw new ArgumentNullException("mangaRecord"); _mangaRecord = mangaRecord; _scraper = ScraperLoader.Instance.AllScrapers.FirstOrDefault(s => s.ScraperGuid == _mangaRecord.Scraper); }
public Main() { InitializeComponent(); _scraper = new MyTwitterScraper(); _proxy = new Proxy(); _export = new Export(); cts = new CancellationTokenSource(); ct = cts.Token; }
public CvBankasDataService(IHttpClientFactory httpClientFactory, IScraperFactory scraperFactory, IUnitOfWork unitOfWork) { _unitOfWork = unitOfWork; _analyser = scraperFactory.BuildAnalyser(JobPortals.CvBankas); _scraper = scraperFactory.BuildScraper(JobPortals.CvBankas); _httpClient = httpClientFactory.CreateClient(JobPortals.CvBankas.GetDescription()); _scrapeClient = new ScrapeClient(_httpClient, _scraper); }
public Reports() { _scraper = App.Container.Resolve<IScraper>(); InitializeComponent(); var reports = _scraper.FetchAllScrapeReports(); LvReports.ItemsSource = reports; }
public ScraperInfo(IScraper scraper, bool enabled) { if (scraper == null) { throw new ArgumentNullException("scraper"); } _scraper = scraper; Enabled = enabled; }
public BundleNotifierService(IConfiguration config, IWebhookSenderService sender, IScraper scraper) { _config = config; _sender = sender; _scraper = scraper; _timer = new System.Timers.Timer(_config.GetValue <int>("ScanningInterval")); _timer.AutoReset = false; _timer.Elapsed += ScanningLoop; }
public MoviePerformer( IRequester requester, IScraper <IEnumerable <Uri> > movieSearchScraper, IScraper <MTReleaseInfo> movieInfoScraper, IScraper <Uri> movieDownloadScraper) { this.requester = requester; this.movieSearchScraper = movieSearchScraper; this.movieInfoScraper = movieInfoScraper; this.movieDownloadScraper = movieDownloadScraper; }
public RssPerformer( IRequester requester, IScraper <IEnumerable <KeyValuePair <MTReleaseInfo, Uri> > > rssScraper, IScraper <IEnumerable <MTReleaseInfo> > seasonScraper, IDownloadGenerator downloadGenerator) { this.requester = requester; this.rssScraper = rssScraper; this.seasonScraper = seasonScraper; this.downloadGenerator = downloadGenerator; }
public TvShowPerformer( IRequester requester, IScraper <IEnumerable <Season> > tvShowScraper, IScraper <IEnumerable <MTReleaseInfo> > seasonScraper, IDownloadGenerator downloadGenerator) { this.requester = requester; this.tvShowScraper = tvShowScraper; this.seasonScraper = seasonScraper; this.downloadGenerator = downloadGenerator; }
/**/ public class List1 { public string Tag { get; set; } public byte[] Resim1 { get; set; } public byte[] Resim2 { get; set; } } public class List2 { public string Tag { get; set; } public byte[] Resim { get; set; } } /**/ public MainWindow() { _scraper = App.Container.Resolve<IScraper>(); _scraper.ScrapeStarted += ScraperOnScrapeStarted; _scraper.ScrapeEnded += ScraperOnScrapeEnded; _scraper.ResourceScraped += ScraperOnResourceScraped; InitializeComponent(); }
public RecentMangaRecord(IMangaRecord mangaRecord) { if (mangaRecord == null) { throw new ArgumentNullException("mangaRecord"); } _mangaRecord = mangaRecord; _scraper = ScraperLoader.Instance.AllScrapers.FirstOrDefault(s => s.ScraperGuid == _mangaRecord.Scraper); }
public DrNuClient(IResourceUriScraper resourceUriScraper, IResourceScraper resourceScraper, IResourceMapper resourceMapper, IProgramSlugScraper programSlugScraper, IProgramUriScraper programUriScraper) { if (resourceUriScraper == null) throw new ArgumentNullException("resourceUriScraper"); if (resourceScraper == null) throw new ArgumentNullException("resourceScraper"); if (resourceMapper == null) throw new ArgumentNullException("resourceMapper"); if (programSlugScraper == null) throw new ArgumentNullException("programSlugScraper"); if (programUriScraper == null) throw new ArgumentNullException("programUriScraper"); _resourceUriScraper = resourceUriScraper; _programUriScraper = programUriScraper; _resourceScraper = resourceScraper; _resourceMapper = resourceMapper; _programSlugScraper = programSlugScraper; }
public SearchService(IScraper[] scrapers, IAggregator aggregator) { if (scrapers == null) { throw new ArgumentNullException("scrapers"); } if (aggregator == null) { throw new ArgumentNullException("aggregator"); } this._aggregator = aggregator; this._scrapers = scrapers; }
public LiveCharts_co_uk_PriceFeed(IScraper scraper) { Condition.Requires(scraper).IsNotNull(); this._scraper = scraper; #region Set starting urls this._listUrls = new string[] { "http://www.livecharts.co.uk/share_map.php?letter=0-9", "http://www.livecharts.co.uk/share_map.php?letter=a", "http://www.livecharts.co.uk/share_map.php?letter=b", "http://www.livecharts.co.uk/share_map.php?letter=c", "http://www.livecharts.co.uk/share_map.php?letter=d", "http://www.livecharts.co.uk/share_map.php?letter=e", "http://www.livecharts.co.uk/share_map.php?letter=f", "http://www.livecharts.co.uk/share_map.php?letter=g", "http://www.livecharts.co.uk/share_map.php?letter=h", "http://www.livecharts.co.uk/share_map.php?letter=i", "http://www.livecharts.co.uk/share_map.php?letter=j", "http://www.livecharts.co.uk/share_map.php?letter=k", "http://www.livecharts.co.uk/share_map.php?letter=l", "http://www.livecharts.co.uk/share_map.php?letter=m", "http://www.livecharts.co.uk/share_map.php?letter=n", "http://www.livecharts.co.uk/share_map.php?letter=o", "http://www.livecharts.co.uk/share_map.php?letter=p", "http://www.livecharts.co.uk/share_map.php?letter=q", "http://www.livecharts.co.uk/share_map.php?letter=r", "http://www.livecharts.co.uk/share_map.php?letter=s", "http://www.livecharts.co.uk/share_map.php?letter=t", "http://www.livecharts.co.uk/share_map.php?letter=u", "http://www.livecharts.co.uk/share_map.php?letter=v", "http://www.livecharts.co.uk/share_map.php?letter=w", "http://www.livecharts.co.uk/share_map.php?letter=x", "http://www.livecharts.co.uk/share_map.php?letter=y", "http://www.livecharts.co.uk/share_map.php?letter=z" }.ToList<string>(); #endregion }
public void TestInitialize() { _view = new TestScraperView(); _presenter = new ImageScraperPresenter(_view); }
public ScraperInfo(IScraper scraper, bool enabled) { if (scraper == null) throw new ArgumentNullException("scraper"); _scraper = scraper; Enabled = enabled; }
private void LoadScraperPreview(IScraper scraper) { if(!(scraper is IPreview)) return; _requestQueue.Add( () => ((IPreview) scraper).Preview(), (r, e) => { var records = r as IEnumerable<IMangaRecord>; if (e == null && r != null) { lock (_syncRoot) { // just replace collection -> this is easier than removing and then adding records Mangas = new AsyncObservableCollection<IMangaRecord>(records); OnPropertyChanged(() => Mangas); } } } ); }
private async Task ScrapAsync(IScraper scraper) { await Task.Run(() => { var progress = new Progress<double>(x => { DispatcherHelper.CheckBeginInvokeOnUI(() => this.ScrapProgress = x); }); var tracks = scraper.Scrap(progress) .Distinct(new TrackComparer()); foreach (var track in tracks) { DispatcherHelper.CheckBeginInvokeOnUI(() => this.ScrapedTracks.Add(track)); } }); }
public ScraperException(TickerSymbol symbol, IScraper scraper, Exception innerException) : this(String.Format("Error looking up symbol {0} from {1}.", symbol, scraper.ProviderName), innerException) { }
private async Task<List<IManga>> GetMangas(IScraper scraper, string value) { var mangas = await scraper.Mangas(); return mangas.Where(e => e.Name.ToLowerInvariant().Contains(value.ToLowerInvariant())).ToList(); }
public TestController() { _scrapeHub = GlobalHost.ConnectionManager.GetHubContext<ScrapeHub>(); _scraper = MvcApplication.Container.Resolve<IScraper>(); _log = LogManager.GetCurrentClassLogger(); }
public GameInfoScrapedEventArgs(ICoreService eventCoreInstance, IGameInfo gameInfo, IScraper gameScraper) : base(eventCoreInstance, gameInfo) { this.GameScraper = gameScraper; }
public TrackerClient(IAnnouncer[] announcers, IScraper[] scrapers) { Announcers = announcers; Scrapers = scrapers; }
public CrawlerValidator(IScraper scraper) { _scraper = scraper; }
internal Picture(IScraper scraper, Uri url, IImage image) : this(scraper.Name, scraper.DisplayName, scraper.Rating, scraper.Categories) { this.url = url; this.image = image; }
public ScraperRunner(IScraper scraper) { if (scraper == null) throw new ArgumentNullException("scraper"); this.scraper = scraper; highlighter = new Highligher(); }
public ImageScraperPresenter(IScraper view) { _view = view; }
public CrawlWebRunner(IScraper scraper) { _scraper = scraper; }