示例#1
0
        static void Main(string[] args)
        {
            ScraperService scraperService = new ScraperService();

            using (var client = new HttpClient())
            {
                //  "Hey, look at this HTML page, and check out this table!"
                var html      = client.GetStreamAsync("http://www.espn.com/esports/story/_/id/21152905/college-esports-list-varsity-esports-programs-north-america").Result;
                var parser    = new HtmlParser();
                var document  = parser.Parse(html);
                var tableRows = document.QuerySelectorAll("table.inline-table tr.last");

                List <School_Scraper> results = new List <School_Scraper>();

                //  Loop through table to scrape data
                foreach (var tr in tableRows)
                {
                    var name      = tr.QuerySelector("td");
                    var state     = tr.QuerySelector("td:nth-child(2)");
                    var athletics = tr.QuerySelector("td:nth-child(3)");

                    var school = new School_Scraper();
                    school.Name      = name.TextContent;
                    school.State     = state.TextContent;
                    school.Athletics = athletics.TextContent;

                    results.Add(school);

                    // give school obj to Service 1 at a time
                    scraperService.Post(school);
                }
                //  Look at the list of schools we just scrapped!
                Console.WriteLine(JsonConvert.SerializeObject(results));
            }   //  calls client.Dispose()
        }
示例#2
0
        /// <summary>
        /// Constuctor for the shell view model
        /// </summary>
        public ShellViewModel()
        {
            ProgressCircleVisibility = Visibility.Collapsed;
            scraper = new ScraperService();
            System.Net.ServicePointManager.DefaultConnectionLimit = 5;

            GetServerData();
        }
示例#3
0
        /// <summary>
        /// Start of program.
        /// </summary>
        /// <returns></returns>
        private static async Task Main()
        {
            var service = new ScraperService();
            var result  = await service.GetFromWebAsync();

            TimeSpan runTime = result.EndTime - result.StartTime;

            Console.WriteLine($"Run time of {runTime}");
        }
示例#4
0
        // GET: Obras
        public ActionResult Procesar()
        {
            Usuario        usuario = new Usuario();
            ScraperService s       = new ScraperService(usuario);

            s.ProcesarDocumento("D:/Code/SGO/Proyecto/Archivos/Editando1.xls", 3, 4);

            //return View(await _context.Obra.ToListAsync());
            return(View());
        }
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        public static void Main(string[] args)
        {
            var service = new ScraperService();

#if (DEBUG)
            service.RunAsConsole(args);
#else
            ServiceBase.Run(new ServiceBase[] { service });
#endif
        }
        public async Task add_async_should_invoke_add_async_on_repository()
        {
            // Arrange
            var scrapService = new ScraperService(_scanPageServiceMock.Object, _adRepository.Object);

            // Act
            await scrapService.ScrapAsync();

            // Assert
            _scanPageServiceMock.Verify(x => x.GetAllAsync(), Times.Once);
        }
        public HttpResponseMessage Scrape(int week)
        {
            var context             = new TimeTableContext(WebConfigurationManager.AppSettings["DbConnectionString"]);
            var scraperRepository   = new ScraperRepository(context);
            var classroomRepository = new ClassroomRepository(context);
            var bookingRepository   = new BookingRepository(context);
            var classRepository     = new ClassRepository(context);
            var scraperService      = new ScraperService(scraperRepository, classroomRepository, classRepository, bookingRepository);

            Task.Run(() => scraperService.Scrape(week));

            return(Request.CreateResponse(HttpStatusCode.OK));
        }
示例#8
0
 public HttpResponseMessage GetAll()
 {
     try
     {
         ItemsResponse <TestDatesModel> resp = new ItemsResponse <TestDatesModel>();
         ScraperService svc = new ScraperService();
         resp.Items = svc.GetAll();
         return(Request.CreateResponse(HttpStatusCode.OK, resp));
     }
     catch (Exception e)
     {
         return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, e.Message));
     }
 }
示例#9
0
        public void TestScraperInstgram()
        {
            var clientFactory = new Mock <IHttpClientFactory>();
            var repo          = new Mock <IApplicationRepository>();

            repo.Setup(x => x.AddDetails(
                           It.IsAny <Application>(),
                           It.IsAny <ApplicationDetails>()))
            .Verifiable();

            IScrapperService scrapper = new ScraperService(repo.Object, clientFactory.Object);

            scrapper.ParseApplication(_application);
        }
示例#10
0
        static async Task Main(string[] args)
        {
            var services = new ServiceCollection()
                           .AddLogging(loggingBuilder =>
            {
                // configure Logging with NLog
                loggingBuilder.ClearProviders();
                loggingBuilder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Trace);
                loggingBuilder.AddNLog(Path.Combine(Environment.CurrentDirectory, "nlog.config"));
            });

            NativeInjectorBootStrapper.RegisterServices(services);
            var serviceProvider = services.BuildServiceProvider();
            var logger          = serviceProvider.GetService <ILogger <Program> >();

            var genreRepository     = serviceProvider.GetService <IGenreRepository>();
            var appDataRepository   = serviceProvider.GetService <IAppDataRepository>();
            var searchAppRepository = serviceProvider.GetService <ISearchAppRepository>();
            var uOW = serviceProvider.GetService <IUnitOfWork>();

            serviceProvider.GetRequiredService <SteamGameParseContext>().Database.Migrate();

            logger.LogInformation("Starting application");
            var service = new ScraperService(
                logger,
                genreRepository,
                appDataRepository,
                searchAppRepository,
                uOW
                );

            logger.LogInformation("start app search");

            // await service.SearchGamesAsync();
            var games = searchAppRepository.GetAll().Where(x => !x.ExecutionResult && x.Id < 15).ToList();

            foreach (var game in games)
            {
                logger.LogInformation($"Scraping app {game.Name}");
                await service.ScrapeAsync(game.Appid);

                var searchGame = searchAppRepository.GetByAppId(game.Appid);
                searchGame.UploadResult(true);
                searchAppRepository.SaveChanges();
                uOW.Commit();
            }

            logger.LogInformation("Scraping done");
        }
示例#11
0
        public HttpResponseMessage GetAll()
        {
            try
            {
                ItemResponse <ScraperList> resp = new ItemResponse <ScraperList>();
                ScraperService             svc  = new ScraperService();

                resp.Item = svc.GetAll();
                return(Request.CreateResponse(HttpStatusCode.OK, resp));
            }
            catch (Exception ex)
            {
                return(Request.CreateErrorResponse(HttpStatusCode.BadRequest, ex.Message));
            }
        }
示例#12
0
        public void Success_ScraperTests(string testName, int callCount, string existingShowUdateTime)
        {
            Mock <HttpMessageHandler>        handlerMock     = MockHandler(testName);
            Mock <IShowService>              showServiceMock = new Mock <IShowService>();
            Mock <ILogger <ScraperService> > loggerMock      = new Mock <ILogger <ScraperService> >();

            if (!existingShowUdateTime.IsNullOrEmpty())
            {
                showServiceMock.Setup(ss => ss.GetShowByShowId(It.IsAny <int>())).Returns(new Show {
                    LastUpdateTime = DateTime.Parse(existingShowUdateTime)
                });
            }

            var httpClient = new HttpClient(handlerMock.Object);

            var service = new ScraperService(httpClient, showServiceMock.Object, loggerMock.Object, configuration, new ShowParser());

            service.DoWork(null);

            showServiceMock.Verify(ss => ss.Upsert(It.IsAny <Show>()), Times.Exactly(callCount));
        }
示例#13
0
        private async Task ResumeAfterRateIsSet(
            IDialogContext context,
            IAwaitable <long> argument)
        {
            rating = (int)await argument;
            string message = GetMessageAfterRatingSet(rating);
            await context.SayAsync(message);

            var    scraper      = new ScraperService();
            string scrappedText = await scraper.ScrapAsync(link);

            TextSplitter splitter = new TextSplitter(scrappedText.Trim());

            string[] split         = splitter.Split(MaxDocumentLength);
            var      textAnalytics = new TextAnalyticsApiService();
            var      keywords      = await textAnalytics.GetKeywordsAsync(split);

            await context.PostAsync($"Keywords: {String.Join(",", keywords)}");

            context.Done <object>(null);
        }
示例#14
0
 public ScrapRegistry(IScanPageService scanPageService, IAdRepository adRepository)
 {
     _scraperService = new ScraperService(scanPageService, adRepository);
     Logger.Debug("Execute Scraper Task!");
     Schedule(async() => await Execute()).WithName("Scrap").ToRunNow().AndEvery(15).Minutes();
 }
示例#15
0
 public HomeController(ScraperService scraperService)
 {
     _scraperService = scraperService;
 }
 public DevToolsController(ScraperService scraperService)
 {
     _scraperService = scraperService;
 }
示例#17
0
 public ScraperController()
 {
     ScraperService = new ScraperService();
 }
示例#18
0
 public ScraperController(ScraperContext ctx)
 {
     Ctx            = ctx;
     scraperService = new ScraperService(Ctx);
     sessionRepo    = new SessionRepo(Ctx);
 }
 public ScraperController(ScraperService scraper)
 {
     _scraper = scraper;
 }
示例#20
0
        static void Main(string[] args)
        {
            List <JobPosting> jobs       = new List <JobPosting>();
            string            initialUrl =
                "https://www.linkedin.com/jobs/search?keywords=Software+Developer&distance=15&locationId=PLACES%2Eus%2E7-1-0-19-99&f_TP=1%2C2&f_E=3%2C2&orig=FCTD&trk=jobs_jserp_facet_exp";

            ChromeOptions options = new ChromeOptions();

            options.AddArgument("--headless");
            options.AddArgument("--incognito");
            options.AddArgument("--ignore-certificate-errors");

            IWebDriver chromeDriver = new ChromeDriver(options);
            int        start        = 1;
            string     pageRange    = "&start=" + start + "&count=50";
            string     initialRange = initialUrl + pageRange;

            chromeDriver.Url = initialRange;
            var    html          = chromeDriver.PageSource;
            var    parser        = new HtmlParser();
            var    doc           = parser.Parse(html);
            var    listings      = doc.QuerySelectorAll("li.job-listing");
            string findListings  = doc.QuerySelector("div.results-context > div > strong").TextContent;
            int    totalListings = 0;

            if (findListings != null)
            {
                totalListings = Convert.ToInt32(findListings);
            }

            int pages = 1;

            addJobs(initialRange);
            if (totalListings > 50)
            {
                int extraPage = 0;
                if (totalListings % 50 > 0)
                {
                    extraPage = 1;
                }

                pages = (int)Math.Floor((decimal)totalListings / 50) + extraPage;

                for (int j = 1; j < pages; j++)
                {
                    start     = j * 50 + 1;
                    pageRange = "&start=" + start.ToString() + "&count=50";
                    addJobs(initialUrl + pageRange);
                }
            }

            void addJobs(string url)
            {
                if (pages > 1)
                {
                    //INavigation GoToUrl(url);
                    options = new ChromeOptions();
                    options.AddArgument("--headless");
                    options.AddArgument("--incognito");
                    options.AddArgument("--ignore-certificate-errors");
                    chromeDriver     = new ChromeDriver(options);
                    chromeDriver.Url = url;
                    html             = chromeDriver.PageSource;
                    parser           = new HtmlParser();
                    doc      = parser.Parse(html);
                    listings = doc.QuerySelectorAll("li.job-listing");
                }

                for (int i = 0; i < listings.Length; i++)
                {
                    JobPosting job     = new JobPosting();
                    var        listing = listings[i]
                                         .QuerySelector("div.job-details");

                    var checkTitle = listing.QuerySelector("span.job-title-text").TextContent;

                    if (!checkTitle.Contains("Senior") &&
                        !checkTitle.Contains("Sr") &&
                        !checkTitle.Contains("Lead") &&
                        !checkTitle.Contains("Principal") &&
                        !checkTitle.Contains("Java") &&
                        !checkTitle.Contains("Clearance") &&
                        !checkTitle.Contains("Graphics") &&
                        !checkTitle.Contains("Android") &&
                        !checkTitle.Contains("iOS") &&
                        !checkTitle.Contains("Wordpress") &&
                        !checkTitle.Contains("WordPress") &&
                        !checkTitle.Contains("PHP")
                        // && checkTitle.IndexOf("Architect", StringComparison.OrdinalIgnoreCase) != -1
                        && !checkTitle.Contains("Ruby") &&
                        !checkTitle.Contains("Manager") &&
                        !checkTitle.Contains("Design") &&
                        !checkTitle.Contains("UI") &&
                        !checkTitle.Contains("Python") &&
                        !checkTitle.Contains("HTML") &&
                        !checkTitle.Contains("CSS") &&
                        !checkTitle.Contains("Salesforce") &&
                        !checkTitle.Contains("SENIOR") &&
                        !checkTitle.Contains("Analyst") &&
                        !checkTitle.Contains("SR") &&
                        checkTitle.Contains("Software")    //this needs to be changed with each search
                        )
                    {
                        job.JobTitle = checkTitle;
                        job.PostDate = listing.QuerySelector("span.date-posted-or-new").TextContent;
                        job.Company  = listing.QuerySelector("span.company-name-text").TextContent;
                        string checkLocation = listing.QuerySelector("span.job-location > span").TextContent;
                        if (checkLocation.Contains(", US"))
                        {
                            job.Location = checkLocation.Replace(", US", "");
                        }
                        else
                        {
                            job.Location = checkLocation;
                        }

                        job.JobDescription = listing.QuerySelector("div.job-description").TextContent;
                        //Job Link
                        XmlDocument xml = new XmlDocument();
                        xml.LoadXml(listing.QuerySelector("a.job-title-link").OuterHtml);
                        XmlElement elem = xml.DocumentElement;
                        if (elem.HasAttribute("href"))
                        {
                            String attr = elem.GetAttribute("href");
                            var    uri  = attr.Split('?')[0];
                            //var uri = new Uri(attr);

                            job.Url = uri;
                        }
                        jobs.Add(job);
                    }
                }
            }

            ScraperService scraperService = new ScraperService(ConfigurationManager.ConnectionStrings["LIConnection"].ConnectionString);

            scraperService.Post(jobs);
        }