// GET api/values public async Task <Response <Roots> > Get() { ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation(); var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds); return(Response <Roots> .Create("success", await Scrapper.Create().GetFlightsDataAsync(Query.GetSampleQuery().ToString(), cts.Token), false)); }
static void Main(string[] args) { var config = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json", false, false) .Build(); var loggerFactory = new LoggerFactory().AddConsole().AddFile("logs/ts-{Date}.txt");; var logger = loggerFactory.CreateLogger <Program>(); var httpClient = new HttpClient(); ServicePointManager.UseNagleAlgorithm = false; ServicePointManager.Expect100Continue = false; ServicePointManager.DefaultConnectionLimit = 100; var retryPolicy = Policy .HandleResult <HttpResponseMessage>(e => e.StatusCode == (System.Net.HttpStatusCode) 429) .WaitAndRetryForeverAsync(attempt => TimeSpan.FromSeconds(5)); var tvMazeClient = new TvMazeClient(httpClient, retryPolicy); var storageDbClient = new DocumentClient(new Uri(config["StorageEndpoint"]), config["StorageKey"]); logger.LogInformation($"Storage db URI {config["StorageEndpoint"]}"); var storage = new Storage(config, storageDbClient); var scrapper = new Scrapper(tvMazeClient, storage, loggerFactory, int.Parse(config["DegreeOfParallelism"])); logger.LogInformation("Initialized all objects. Starting process of grabbing info."); MainAsync(scrapper, logger).GetAwaiter().GetResult(); }
public void CheckOn2HtmlCommentFirstComment() { String htmlData = File.ReadAllText($"HtmlData/TwoComments.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData); var review = scrapper.ExtractComment(listOfHtmlComments.First().OuterHtml); Check.That(review.Title).Equals("FM Radio still not active in the US unlocked version"); Check.That(review.Comment).StartsWith("Despite Samsung's promises,"); Check.That(review.Comment).EndsWith("or working update real soon. *"); Check.That(review.Author).Equals("Ta"); Check.That(review.NbPeopleFindHelpful).Equals(159); Check.That(review.VerifiedPurchase).Equals(true); var expectedDate = new DateTime(2020, 3, 8); Check.That(review.ReviewDate). IsInSameYearAs(expectedDate).And. IsInSameMonthAs(expectedDate).And. IsInSameDayAs(expectedDate); Check.That(review.NbComment).Equals(13); Check.That(review.Star).Equals(1); }
// This method gets called by the runtime. Use this method to add services to the container. public void ConfigureServices(IServiceCollection services) { var scrapper = new Scrapper(); services .AddSingleton(provider => { var db = new CelebsDB(Path.Combine(Environment.CurrentDirectory, "data")); if (db.Celebs.Count == 0) { Task.Run(() => Helpers.LoadData(db, scrapper)) .Wait(); } return(db); }) .AddSingleton(provider => scrapper) .AddLogging(logging => logging .AddConsole() .SetMinimumLevel(LogLevel.Debug)); services .AddMvc() .SetCompatibilityVersion(CompatibilityVersion.Version_2_2); }
public ActionResult GetUrl(FormCollection collection) { try { Scrapper scrape = new Scrapper(); string url = collection["Name"]; string[] results = { }; scrape.ScrapeWebPage(url, out results); ViewData["Message"] = "Success"; ViewData["Divs"] = results[1]; ViewData["Spans"] = results[2]; ViewData["Links"] = results[3]; ViewData["Last URL"] = results[0]; ViewData["UrlsCount"] = Scrapper.countDBDocs(); return(View("~/Views/Home/index.cshtml")); } catch { ViewData["Message"] = "Failed"; return(View("~/Views/Home/index.cshtml")); } }
public void SetupAttributes(Tuple <XModule, XModuleAttribute, bool> modulePack, IHTMLElement link) { var checkValidLink = link.getAttribute("href"); if (checkValidLink.Contains("javascript()") || link.innerText == null || checkValidLink == "") { return; } var rgx = new Regex("\n"); var innerText = rgx.Replace(link.innerText, "").Trim(); if (innerText == "") { return; } var attribute = modulePack.Item3 ? modulePack.Item1.CreateModuleAttribute() : modulePack.Item2.CreateModuleAttribute(); attribute.DefaultActionMode = XTestStepActionMode.Input; attribute.Name = innerText; attribute.BusinessType = "Link"; AddBusinessParam(attribute.CreateConfigurationParam(), "Engine", "Html"); AddBusinessParam(attribute.CreateConfigurationParam(), "BusinessAssociation", "Descendants"); foreach (var technical in Scrapper.DecideForTechnicals(link)) { AddBusinessParam(attribute.CreateTechnicalIDParam(), technical.Key, technical.Value); } AddBusinessParam(attribute.CreateTechnicalIDParam(), "InnerText", innerText); }
public static Dictionary <string, string> GetPrecipitation() { var scrapper = new Scrapper(); scrapper.Scrap(); return(scrapper.Precipitacion); }
//TODO: this should return Mendeley objects instead of the raw. So, parse the HTML. public static IEnumerable <string> Search(string Filter, MendeleyDataType[] Types = null) { string URL = $"{MendeleyURL}?query={WebUtility.UrlEncode(Filter)}&page=0&{Types.Parse()}"; var result = Scrapper.GetNodesFromURLByClass(URL, SearchResultHeader); return(null); }
public void Execute(IJobExecutionContext context) { //Data crawling codes here System.Diagnostics.Debug.WriteLine("Executing job..."); Scrapper scrapper = new Scrapper(); scrapper.scrapCinemaName("https://www.google.com/movies?near=singapore&rl=1&stok=ABAPP2tdNR_5cLRa-6emW2UtecEL44SX2A%3A1456036737594"); List <Cinema> cinemaList = scrapper.getCinemaNames(); int size = cinemaList.Count() - 1; //int size = 10; Cinema cinema = new Cinema(); while (size >= 0) { System.Diagnostics.Debug.WriteLine("size: " + size); //cinema.CinemaName = "name"; //cinema.CinemaAddress = "addr"; //cinemaGateway.Insert(cinema); cinema.CinemaName = cinemaList[size].CinemaName; System.Diagnostics.Debug.WriteLine("Cinena Name: " + cinema.CinemaName); cinema.CinemaAddress = cinemaList[size].CinemaAddress; System.Diagnostics.Debug.WriteLine("Cinema Address: " + cinema.CinemaAddress); cinemaGateway.Insert(cinema); size--; } System.Diagnostics.Debug.WriteLine("Job ended... "); }
public static Dictionary <string, string> GetDescription() { var scrapper = new Scrapper(); scrapper.GetDescripcionclima(); return(scrapper.DescripcionDia); }
public void CheckOn2HtmlCommentSecondComment() { String htmlData = File.ReadAllText($"HtmlData/TwoComments.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData); var review = scrapper.ExtractComment(listOfHtmlComments[1].OuterHtml); Check.That(review.Title).Equals("Incomplete shipment"); Check.That(review.Comment).StartsWith("Didn't come with the offered Buds"); Check.That(review.Comment).EndsWith("What's up!!"); Check.That(review.Author).Equals("Ricardo Wagner"); Check.That(review.NbPeopleFindHelpful).Equals(134); Check.That(review.VerifiedPurchase).Equals(true); var expectedDate = new DateTime(2020, 3, 10); Check.That(review.ReviewDate). IsInSameYearAs(expectedDate).And. IsInSameMonthAs(expectedDate).And. IsInSameDayAs(expectedDate); Check.That(review.NbComment).Equals(3); Check.That(review.Star).Equals(3); }
public static Dictionary <string, string> GetMaxTemperature() { var scrapper = new Scrapper(); scrapper.Scrap(); return(scrapper.TemperaturaMaxima); }
public void Test100Query() { TweetSearchQuery queryInfo = new TweetSearchQuery("나이키", maxTweetCount: 2000); var scrapper = new Scrapper(_token, queryInfo); var tweets = scrapper.Scrap(); Assert.IsTrue(tweets.Count() <= (queryInfo.MaxCount - 1) + queryInfo.CountPerQuery); }
public CarScrapperSwissCars(IServiceScopeFactory factory) { TimeFrequency = 2850000; //47min VehiclesToAddAtOnce = 20; Repo = factory.CreateScope().ServiceProvider.GetRequiredService <IRepositoryVehicle>(); WebScrp = new Scrapper(); this.WebScrpSwiss = new WebScrapperSwissCars(); }
// GET api/values public async Task <Response <List <Itinerary> > > Get() { ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation(); var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds); //return Scrapper.Create().RunTask<List<Itinerary>>(Scrapper.Create().GetItinerariesAsync(Query.GetSampleQuery(), cts.Token)); return(Response <List <Itinerary> > .Create(await Scrapper.Create().GetItinerariesAsync(Query.GetSampleQuery(), cts.Token), true)); }
// POST api/values public Response <Routes> Post([FromBody] Query query) { if (query == null) { return(Response <Routes> .Create("Invalid Request", null, false)); } return(Response <Routes> .Create(Scrapper.Create().GetFlightData(query), true)); }
public void CheckGetNumberOfComment() { String htmlData = File.ReadAllText($"HtmlData/FullPageOfReview.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); Int32 nbComments = scrapper.GetNbComments(htmlData); Check.That(nbComments).Equals(86); }
public void CheckUnverifiedPurchase() { String htmlData = File.ReadAllText($"HtmlData/ReviewFromUnverifiedPurchase.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); Review review = scrapper.ExtractComment(htmlData); Check.That(review.VerifiedPurchase).Equals(false); }
public void CheckThat2CommentsOnHtml() { String htmlData = File.ReadAllText($"HtmlData/TwoComments.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData); Check.That(listOfHtmlComments).HasSize(2); }
public void CheckFullPageOfReviewNbReview() { String htmlData = File.ReadAllText($"HtmlData/FullPageOfReview.html"); DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper(); List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData); Check.That(listOfHtmlComments).HasSize(10); }
// POST api/values public async Task <Response <Roots> > Post([FromBody] Query query) { if (query == null) { return(Response <Roots> .Create("Invalid Request", null, false)); } ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation(); var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds); return(Response <Roots> .Create("success", await Scrapper.Create().GetFlightsDataAsync(query.ToString(), cts.Token), true)); }
public void TestDefaultQuery() { var queryInfo = new TweetSearchQuery("나이키"); var scrapper = new Scrapper(_token, queryInfo); var tweets = scrapper.Scrap(); // 최대 갯수 (MaxScrapTweetCount - 1) + TweetCountPerPage Assert.IsTrue(tweets.Count() <= (queryInfo.MaxCount - 1) + queryInfo.CountPerQuery); }
public ScrapperController() { Scrapper = new Scrapper(); Scrapper.Notifier += scrapper_Notifier; StyleConsole(); Parser.Default.ParseArguments <Options>(Program.Arguments) .WithParsed(Run) .WithNotParsed(HandleParseError); }
private void btnImprove_Click(object sender, EventArgs e) { Scrapper scrapper = new Scrapper(); LoadSkipRadios(scrapper); string file = Path.Combine(Context.InputDataDirectory, @"todos2001.dbf"); scrapper.GetHiResMapsFromDbfList(file, false); Console.WriteLine("Listo"); MessageBox.Show(this, "Listo"); }
// POST api/values public async Task <Response <List <Itinerary> > > Post([FromBody] Query query) { if (query == null) { return(Response <List <Itinerary> > .Create("Invalid Request")); } ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation(); var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds); return(Response <List <Itinerary> > .Create(await Scrapper.Create().GetItinerariesAsync(query, cts.Token), true)); }
public void TestDefaultQuery() { var queryInfo = new UserTimelineQuery() { ScreenName = "twitterapi" }; var scrapper = new Scrapper(_token, queryInfo); var tweets = scrapper.Scrap(); // 최대 갯수 (MaxScrapTweetCount - 1) + TweetCountPerPage //Assert.IsTrue(tweets.Count() <= (queryInfo.MaxTweetCount - 1) + queryInfo.TweetCountPerPage); }
private static async Task RunScrappingAsync(IDataHandler <Person> handler, InputDataProvider inputData) { var logRepository = LogManager.GetRepository(Assembly.GetEntryAssembly()); XmlConfigurator.Configure(logRepository, new FileInfo(configLoggingFileName)); ILog log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); IScrapper <Person> scrapper = new Scrapper <Person>(); IScrapersManager scrapperManager = new ScrapersManager(inputData, handler, scrapper, log); scrapperManager.Notify += ScrapperManagerNotify; await scrapperManager.ScrapDataAsync(); }
public async Task TestWorkflow() { const string bookingPageUrl = "https://harrypottertheplay.nimaxtheatres.com/hpcc/WEBPAGES/EntaWebGateway/gateway.aspx?E=N&QL=S2728|RCAR1|VPAL|G~/WEBPAGES/EntaWebShow/ShowPerformance.aspx"; var pageDownloaderFake = new PageDownloaderFake(); pageDownloaderFake.SetPage(Scrapper.LandingPageUrl, File.ReadAllText(@"Resources\LandingPage.html")); pageDownloaderFake.SetPage(bookingPageUrl, File.ReadAllText(@"Resources\BookingPage.html")); var scrapper = new Scrapper(pageDownloaderFake); var performances = await scrapper.DownloadPerformances(); Assert.NotEmpty(performances); Assert.True(pageDownloaderFake.IsInInitialState); }
public ScrapperTests() { _configMock = new Mock <IConfiguration>(); _httpClientMock = new Mock <IHttpClientProvider>(); _nodeParserMock = new Mock <INodeProcessor>(); _areaFinder = new Mock <IGeoAreaFinder>(); _httpClientMock.Setup(mock => mock.GetHtmlDocumentWithProxy(It.IsAny <string>())).Returns("some_html_document"); _nodeParserMock.Setup(mock => mock.ExtractMmsiFromHtml(It.IsAny <string>())).Returns(11111111); _nodeParserMock.Setup(mock => mock.ExtractLatFromHtml(It.IsAny <string>())).Returns(11.111); _nodeParserMock.Setup(mock => mock.ExtractLonFromHtml(It.IsAny <string>())).Returns(12.112); _nodeParserMock.Setup(mock => mock.ExtractAisUpdateTimeFromHtml(It.IsAny <string>(), It.IsAny <string>())).Returns(new DateTime(2020, 01, 01)); _service = new Scrapper(_configMock.Object, _httpClientMock.Object, _nodeParserMock.Object, _areaFinder.Object); }
private void btnGet_Click(object sender, EventArgs e) { Scrapper scrapper = new Scrapper(); scrapper.ProvFilter = txtProv.Text; scrapper.DptoFilter = txtDepto.Text; scrapper.FracFilter = txtFrac.Text; scrapper.RadioFilter = txtRadio.Text; //LoadSkipRadios(scrapper); string file = Path.Combine(Context.InputDataDirectory, @"todos2001.dbf"); scrapper.GetMapsFromDbfList(file); Console.WriteLine("Listo"); MessageBox.Show(this, "Listo"); }