static void Main(string[] args) { ScraperService scraperService = new ScraperService(); using (var client = new HttpClient()) { // "Hey, look at this HTML page, and check out this table!" var html = client.GetStreamAsync("http://www.espn.com/esports/story/_/id/21152905/college-esports-list-varsity-esports-programs-north-america").Result; var parser = new HtmlParser(); var document = parser.Parse(html); var tableRows = document.QuerySelectorAll("table.inline-table tr.last"); List <School_Scraper> results = new List <School_Scraper>(); // Loop through table to scrape data foreach (var tr in tableRows) { var name = tr.QuerySelector("td"); var state = tr.QuerySelector("td:nth-child(2)"); var athletics = tr.QuerySelector("td:nth-child(3)"); var school = new School_Scraper(); school.Name = name.TextContent; school.State = state.TextContent; school.Athletics = athletics.TextContent; results.Add(school); // give school obj to Service 1 at a time scraperService.Post(school); } // Look at the list of schools we just scrapped! Console.WriteLine(JsonConvert.SerializeObject(results)); } // calls client.Dispose() }
/// <summary> /// Constuctor for the shell view model /// </summary> public ShellViewModel() { ProgressCircleVisibility = Visibility.Collapsed; scraper = new ScraperService(); System.Net.ServicePointManager.DefaultConnectionLimit = 5; GetServerData(); }
/// <summary> /// Start of program. /// </summary> /// <returns></returns> private static async Task Main() { var service = new ScraperService(); var result = await service.GetFromWebAsync(); TimeSpan runTime = result.EndTime - result.StartTime; Console.WriteLine($"Run time of {runTime}"); }
// GET: Obras public ActionResult Procesar() { Usuario usuario = new Usuario(); ScraperService s = new ScraperService(usuario); s.ProcesarDocumento("D:/Code/SGO/Proyecto/Archivos/Editando1.xls", 3, 4); //return View(await _context.Obra.ToListAsync()); return(View()); }
/// <summary> /// The main entry point for the application. /// </summary> public static void Main(string[] args) { var service = new ScraperService(); #if (DEBUG) service.RunAsConsole(args); #else ServiceBase.Run(new ServiceBase[] { service }); #endif }
public async Task add_async_should_invoke_add_async_on_repository() { // Arrange var scrapService = new ScraperService(_scanPageServiceMock.Object, _adRepository.Object); // Act await scrapService.ScrapAsync(); // Assert _scanPageServiceMock.Verify(x => x.GetAllAsync(), Times.Once); }
public HttpResponseMessage Scrape(int week) { var context = new TimeTableContext(WebConfigurationManager.AppSettings["DbConnectionString"]); var scraperRepository = new ScraperRepository(context); var classroomRepository = new ClassroomRepository(context); var bookingRepository = new BookingRepository(context); var classRepository = new ClassRepository(context); var scraperService = new ScraperService(scraperRepository, classroomRepository, classRepository, bookingRepository); Task.Run(() => scraperService.Scrape(week)); return(Request.CreateResponse(HttpStatusCode.OK)); }
public HttpResponseMessage GetAll() { try { ItemsResponse <TestDatesModel> resp = new ItemsResponse <TestDatesModel>(); ScraperService svc = new ScraperService(); resp.Items = svc.GetAll(); return(Request.CreateResponse(HttpStatusCode.OK, resp)); } catch (Exception e) { return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, e.Message)); } }
public void TestScraperInstgram() { var clientFactory = new Mock <IHttpClientFactory>(); var repo = new Mock <IApplicationRepository>(); repo.Setup(x => x.AddDetails( It.IsAny <Application>(), It.IsAny <ApplicationDetails>())) .Verifiable(); IScrapperService scrapper = new ScraperService(repo.Object, clientFactory.Object); scrapper.ParseApplication(_application); }
static async Task Main(string[] args) { var services = new ServiceCollection() .AddLogging(loggingBuilder => { // configure Logging with NLog loggingBuilder.ClearProviders(); loggingBuilder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Trace); loggingBuilder.AddNLog(Path.Combine(Environment.CurrentDirectory, "nlog.config")); }); NativeInjectorBootStrapper.RegisterServices(services); var serviceProvider = services.BuildServiceProvider(); var logger = serviceProvider.GetService <ILogger <Program> >(); var genreRepository = serviceProvider.GetService <IGenreRepository>(); var appDataRepository = serviceProvider.GetService <IAppDataRepository>(); var searchAppRepository = serviceProvider.GetService <ISearchAppRepository>(); var uOW = serviceProvider.GetService <IUnitOfWork>(); serviceProvider.GetRequiredService <SteamGameParseContext>().Database.Migrate(); logger.LogInformation("Starting application"); var service = new ScraperService( logger, genreRepository, appDataRepository, searchAppRepository, uOW ); logger.LogInformation("start app search"); // await service.SearchGamesAsync(); var games = searchAppRepository.GetAll().Where(x => !x.ExecutionResult && x.Id < 15).ToList(); foreach (var game in games) { logger.LogInformation($"Scraping app {game.Name}"); await service.ScrapeAsync(game.Appid); var searchGame = searchAppRepository.GetByAppId(game.Appid); searchGame.UploadResult(true); searchAppRepository.SaveChanges(); uOW.Commit(); } logger.LogInformation("Scraping done"); }
public HttpResponseMessage GetAll() { try { ItemResponse <ScraperList> resp = new ItemResponse <ScraperList>(); ScraperService svc = new ScraperService(); resp.Item = svc.GetAll(); return(Request.CreateResponse(HttpStatusCode.OK, resp)); } catch (Exception ex) { return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ex.Message)); } }
public void Success_ScraperTests(string testName, int callCount, string existingShowUdateTime) { Mock <HttpMessageHandler> handlerMock = MockHandler(testName); Mock <IShowService> showServiceMock = new Mock <IShowService>(); Mock <ILogger <ScraperService> > loggerMock = new Mock <ILogger <ScraperService> >(); if (!existingShowUdateTime.IsNullOrEmpty()) { showServiceMock.Setup(ss => ss.GetShowByShowId(It.IsAny <int>())).Returns(new Show { LastUpdateTime = DateTime.Parse(existingShowUdateTime) }); } var httpClient = new HttpClient(handlerMock.Object); var service = new ScraperService(httpClient, showServiceMock.Object, loggerMock.Object, configuration, new ShowParser()); service.DoWork(null); showServiceMock.Verify(ss => ss.Upsert(It.IsAny <Show>()), Times.Exactly(callCount)); }
private async Task ResumeAfterRateIsSet( IDialogContext context, IAwaitable <long> argument) { rating = (int)await argument; string message = GetMessageAfterRatingSet(rating); await context.SayAsync(message); var scraper = new ScraperService(); string scrappedText = await scraper.ScrapAsync(link); TextSplitter splitter = new TextSplitter(scrappedText.Trim()); string[] split = splitter.Split(MaxDocumentLength); var textAnalytics = new TextAnalyticsApiService(); var keywords = await textAnalytics.GetKeywordsAsync(split); await context.PostAsync($"Keywords: {String.Join(",", keywords)}"); context.Done <object>(null); }
public ScrapRegistry(IScanPageService scanPageService, IAdRepository adRepository) { _scraperService = new ScraperService(scanPageService, adRepository); Logger.Debug("Execute Scraper Task!"); Schedule(async() => await Execute()).WithName("Scrap").ToRunNow().AndEvery(15).Minutes(); }
public HomeController(ScraperService scraperService) { _scraperService = scraperService; }
public DevToolsController(ScraperService scraperService) { _scraperService = scraperService; }
public ScraperController() { ScraperService = new ScraperService(); }
public ScraperController(ScraperContext ctx) { Ctx = ctx; scraperService = new ScraperService(Ctx); sessionRepo = new SessionRepo(Ctx); }
public ScraperController(ScraperService scraper) { _scraper = scraper; }
static void Main(string[] args) { List <JobPosting> jobs = new List <JobPosting>(); string initialUrl = "https://www.linkedin.com/jobs/search?keywords=Software+Developer&distance=15&locationId=PLACES%2Eus%2E7-1-0-19-99&f_TP=1%2C2&f_E=3%2C2&orig=FCTD&trk=jobs_jserp_facet_exp"; ChromeOptions options = new ChromeOptions(); options.AddArgument("--headless"); options.AddArgument("--incognito"); options.AddArgument("--ignore-certificate-errors"); IWebDriver chromeDriver = new ChromeDriver(options); int start = 1; string pageRange = "&start=" + start + "&count=50"; string initialRange = initialUrl + pageRange; chromeDriver.Url = initialRange; var html = chromeDriver.PageSource; var parser = new HtmlParser(); var doc = parser.Parse(html); var listings = doc.QuerySelectorAll("li.job-listing"); string findListings = doc.QuerySelector("div.results-context > div > strong").TextContent; int totalListings = 0; if (findListings != null) { totalListings = Convert.ToInt32(findListings); } int pages = 1; addJobs(initialRange); if (totalListings > 50) { int extraPage = 0; if (totalListings % 50 > 0) { extraPage = 1; } pages = (int)Math.Floor((decimal)totalListings / 50) + extraPage; for (int j = 1; j < pages; j++) { start = j * 50 + 1; pageRange = "&start=" + start.ToString() + "&count=50"; addJobs(initialUrl + pageRange); } } void addJobs(string url) { if (pages > 1) { //INavigation GoToUrl(url); options = new ChromeOptions(); options.AddArgument("--headless"); options.AddArgument("--incognito"); options.AddArgument("--ignore-certificate-errors"); chromeDriver = new ChromeDriver(options); chromeDriver.Url = url; html = chromeDriver.PageSource; parser = new HtmlParser(); doc = parser.Parse(html); listings = doc.QuerySelectorAll("li.job-listing"); } for (int i = 0; i < listings.Length; i++) { JobPosting job = new JobPosting(); var listing = listings[i] .QuerySelector("div.job-details"); var checkTitle = listing.QuerySelector("span.job-title-text").TextContent; if (!checkTitle.Contains("Senior") && !checkTitle.Contains("Sr") && !checkTitle.Contains("Lead") && !checkTitle.Contains("Principal") && !checkTitle.Contains("Java") && !checkTitle.Contains("Clearance") && !checkTitle.Contains("Graphics") && !checkTitle.Contains("Android") && !checkTitle.Contains("iOS") && !checkTitle.Contains("Wordpress") && !checkTitle.Contains("WordPress") && !checkTitle.Contains("PHP") // && checkTitle.IndexOf("Architect", StringComparison.OrdinalIgnoreCase) != -1 && !checkTitle.Contains("Ruby") && !checkTitle.Contains("Manager") && !checkTitle.Contains("Design") && !checkTitle.Contains("UI") && !checkTitle.Contains("Python") && !checkTitle.Contains("HTML") && !checkTitle.Contains("CSS") && !checkTitle.Contains("Salesforce") && !checkTitle.Contains("SENIOR") && !checkTitle.Contains("Analyst") && !checkTitle.Contains("SR") && checkTitle.Contains("Software") //this needs to be changed with each search ) { job.JobTitle = checkTitle; job.PostDate = listing.QuerySelector("span.date-posted-or-new").TextContent; job.Company = listing.QuerySelector("span.company-name-text").TextContent; string checkLocation = listing.QuerySelector("span.job-location > span").TextContent; if (checkLocation.Contains(", US")) { job.Location = checkLocation.Replace(", US", ""); } else { job.Location = checkLocation; } job.JobDescription = listing.QuerySelector("div.job-description").TextContent; //Job Link XmlDocument xml = new XmlDocument(); xml.LoadXml(listing.QuerySelector("a.job-title-link").OuterHtml); XmlElement elem = xml.DocumentElement; if (elem.HasAttribute("href")) { String attr = elem.GetAttribute("href"); var uri = attr.Split('?')[0]; //var uri = new Uri(attr); job.Url = uri; } jobs.Add(job); } } } ScraperService scraperService = new ScraperService(ConfigurationManager.ConnectionStrings["LIConnection"].ConnectionString); scraperService.Post(jobs); }