Exemplo n.º 1
0
        // GET api/values
        public async Task <Response <Roots> > Get()
        {
            ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation();
            var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

            return(Response <Roots> .Create("success", await Scrapper.Create().GetFlightsDataAsync(Query.GetSampleQuery().ToString(), cts.Token), false));
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            var config = new ConfigurationBuilder()
                         .SetBasePath(Directory.GetCurrentDirectory())
                         .AddJsonFile("appsettings.json", false, false)
                         .Build();
            var loggerFactory = new LoggerFactory().AddConsole().AddFile("logs/ts-{Date}.txt");;
            var logger        = loggerFactory.CreateLogger <Program>();

            var httpClient = new HttpClient();

            ServicePointManager.UseNagleAlgorithm      = false;
            ServicePointManager.Expect100Continue      = false;
            ServicePointManager.DefaultConnectionLimit = 100;

            var retryPolicy =
                Policy
                .HandleResult <HttpResponseMessage>(e => e.StatusCode == (System.Net.HttpStatusCode) 429)
                .WaitAndRetryForeverAsync(attempt => TimeSpan.FromSeconds(5));

            var tvMazeClient    = new TvMazeClient(httpClient, retryPolicy);
            var storageDbClient = new DocumentClient(new Uri(config["StorageEndpoint"]), config["StorageKey"]);

            logger.LogInformation($"Storage db URI {config["StorageEndpoint"]}");
            var storage  = new Storage(config, storageDbClient);
            var scrapper = new Scrapper(tvMazeClient, storage, loggerFactory, int.Parse(config["DegreeOfParallelism"]));

            logger.LogInformation("Initialized all objects. Starting process of grabbing info.");

            MainAsync(scrapper, logger).GetAwaiter().GetResult();
        }
        public void CheckOn2HtmlCommentFirstComment()
        {
            String htmlData = File.ReadAllText($"HtmlData/TwoComments.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();
            List <IElement> listOfHtmlComments            = scrapper.GetListOfHtmlComment(htmlData);

            var review = scrapper.ExtractComment(listOfHtmlComments.First().OuterHtml);

            Check.That(review.Title).Equals("FM Radio still not active in the US unlocked version");

            Check.That(review.Comment).StartsWith("Despite Samsung's promises,");
            Check.That(review.Comment).EndsWith("or working update real soon. *");

            Check.That(review.Author).Equals("Ta");
            Check.That(review.NbPeopleFindHelpful).Equals(159);

            Check.That(review.VerifiedPurchase).Equals(true);

            var expectedDate = new DateTime(2020, 3, 8);

            Check.That(review.ReviewDate).
            IsInSameYearAs(expectedDate).And.
            IsInSameMonthAs(expectedDate).And.
            IsInSameDayAs(expectedDate);

            Check.That(review.NbComment).Equals(13);
            Check.That(review.Star).Equals(1);
        }
Exemplo n.º 4
0
        // This method gets called by the runtime. Use this method to add services to the container.
        public void ConfigureServices(IServiceCollection services)
        {
            var scrapper = new Scrapper();

            services
            .AddSingleton(provider => {
                var db = new CelebsDB(Path.Combine(Environment.CurrentDirectory, "data"));

                if (db.Celebs.Count == 0)
                {
                    Task.Run(() => Helpers.LoadData(db, scrapper))
                    .Wait();
                }

                return(db);
            })
            .AddSingleton(provider => scrapper)
            .AddLogging(logging =>
                        logging
                        .AddConsole()
                        .SetMinimumLevel(LogLevel.Debug));

            services
            .AddMvc()
            .SetCompatibilityVersion(CompatibilityVersion.Version_2_2);
        }
Exemplo n.º 5
0
        public ActionResult GetUrl(FormCollection collection)
        {
            try
            {
                Scrapper scrape  = new Scrapper();
                string   url     = collection["Name"];
                string[] results = { };
                scrape.ScrapeWebPage(url, out results);
                ViewData["Message"]  = "Success";
                ViewData["Divs"]     = results[1];
                ViewData["Spans"]    = results[2];
                ViewData["Links"]    = results[3];
                ViewData["Last URL"] = results[0];

                ViewData["UrlsCount"] = Scrapper.countDBDocs();


                return(View("~/Views/Home/index.cshtml"));
            }

            catch
            {
                ViewData["Message"] = "Failed";

                return(View("~/Views/Home/index.cshtml"));
            }
        }
Exemplo n.º 6
0
        public void SetupAttributes(Tuple <XModule, XModuleAttribute, bool> modulePack, IHTMLElement link)
        {
            var checkValidLink = link.getAttribute("href");

            if (checkValidLink.Contains("javascript()") || link.innerText == null || checkValidLink == "")
            {
                return;
            }

            var rgx       = new Regex("\n");
            var innerText = rgx.Replace(link.innerText, "").Trim();

            if (innerText == "")
            {
                return;
            }


            var attribute = modulePack.Item3 ? modulePack.Item1.CreateModuleAttribute() : modulePack.Item2.CreateModuleAttribute();

            attribute.DefaultActionMode = XTestStepActionMode.Input;
            attribute.Name = innerText;

            attribute.BusinessType = "Link";
            AddBusinessParam(attribute.CreateConfigurationParam(), "Engine", "Html");
            AddBusinessParam(attribute.CreateConfigurationParam(), "BusinessAssociation", "Descendants");

            foreach (var technical in Scrapper.DecideForTechnicals(link))
            {
                AddBusinessParam(attribute.CreateTechnicalIDParam(), technical.Key, technical.Value);
            }

            AddBusinessParam(attribute.CreateTechnicalIDParam(), "InnerText", innerText);
        }
Exemplo n.º 7
0
        public static Dictionary <string, string> GetPrecipitation()
        {
            var scrapper = new Scrapper();

            scrapper.Scrap();
            return(scrapper.Precipitacion);
        }
        //TODO: this should return Mendeley objects instead of the raw. So, parse the HTML.
        public static IEnumerable <string> Search(string Filter, MendeleyDataType[] Types = null)
        {
            string URL    = $"{MendeleyURL}?query={WebUtility.UrlEncode(Filter)}&page=0&{Types.Parse()}";
            var    result = Scrapper.GetNodesFromURLByClass(URL, SearchResultHeader);

            return(null);
        }
Exemplo n.º 9
0
        public void Execute(IJobExecutionContext context)
        {
            //Data crawling codes here
            System.Diagnostics.Debug.WriteLine("Executing job...");
            Scrapper scrapper = new Scrapper();

            scrapper.scrapCinemaName("https://www.google.com/movies?near=singapore&rl=1&stok=ABAPP2tdNR_5cLRa-6emW2UtecEL44SX2A%3A1456036737594");
            List <Cinema> cinemaList = scrapper.getCinemaNames();
            int           size       = cinemaList.Count() - 1;
            //int size = 10;
            Cinema cinema = new Cinema();

            while (size >= 0)
            {
                System.Diagnostics.Debug.WriteLine("size: " + size);
                //cinema.CinemaName = "name";
                //cinema.CinemaAddress = "addr";
                //cinemaGateway.Insert(cinema);
                cinema.CinemaName = cinemaList[size].CinemaName;
                System.Diagnostics.Debug.WriteLine("Cinena Name: " + cinema.CinemaName);
                cinema.CinemaAddress = cinemaList[size].CinemaAddress;
                System.Diagnostics.Debug.WriteLine("Cinema Address: " + cinema.CinemaAddress);
                cinemaGateway.Insert(cinema);
                size--;
            }
            System.Diagnostics.Debug.WriteLine("Job ended... ");
        }
Exemplo n.º 10
0
        public static Dictionary <string, string> GetDescription()
        {
            var scrapper = new Scrapper();

            scrapper.GetDescripcionclima();
            return(scrapper.DescripcionDia);
        }
        public void CheckOn2HtmlCommentSecondComment()
        {
            String htmlData = File.ReadAllText($"HtmlData/TwoComments.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();
            List <IElement> listOfHtmlComments            = scrapper.GetListOfHtmlComment(htmlData);

            var review = scrapper.ExtractComment(listOfHtmlComments[1].OuterHtml);

            Check.That(review.Title).Equals("Incomplete shipment");

            Check.That(review.Comment).StartsWith("Didn't come with the offered Buds");
            Check.That(review.Comment).EndsWith("What's up!!");

            Check.That(review.Author).Equals("Ricardo Wagner");
            Check.That(review.NbPeopleFindHelpful).Equals(134);

            Check.That(review.VerifiedPurchase).Equals(true);

            var expectedDate = new DateTime(2020, 3, 10);

            Check.That(review.ReviewDate).
            IsInSameYearAs(expectedDate).And.
            IsInSameMonthAs(expectedDate).And.
            IsInSameDayAs(expectedDate);

            Check.That(review.NbComment).Equals(3);
            Check.That(review.Star).Equals(3);
        }
Exemplo n.º 12
0
        public static Dictionary <string, string> GetMaxTemperature()
        {
            var scrapper = new Scrapper();

            scrapper.Scrap();
            return(scrapper.TemperaturaMaxima);
        }
Exemplo n.º 13
0
        public void Test100Query()
        {
            TweetSearchQuery queryInfo = new TweetSearchQuery("나이키", maxTweetCount: 2000);
            var scrapper = new Scrapper(_token, queryInfo);
            var tweets   = scrapper.Scrap();

            Assert.IsTrue(tweets.Count() <= (queryInfo.MaxCount - 1) + queryInfo.CountPerQuery);
        }
Exemplo n.º 14
0
 public CarScrapperSwissCars(IServiceScopeFactory factory)
 {
     TimeFrequency       = 2850000; //47min
     VehiclesToAddAtOnce = 20;
     Repo              = factory.CreateScope().ServiceProvider.GetRequiredService <IRepositoryVehicle>();
     WebScrp           = new Scrapper();
     this.WebScrpSwiss = new WebScrapperSwissCars();
 }
Exemplo n.º 15
0
        // GET api/values
        public async Task <Response <List <Itinerary> > > Get()
        {
            ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation();
            var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

            //return Scrapper.Create().RunTask<List<Itinerary>>(Scrapper.Create().GetItinerariesAsync(Query.GetSampleQuery(), cts.Token));
            return(Response <List <Itinerary> > .Create(await Scrapper.Create().GetItinerariesAsync(Query.GetSampleQuery(), cts.Token), true));
        }
Exemplo n.º 16
0
 // POST api/values
 public Response <Routes> Post([FromBody] Query query)
 {
     if (query == null)
     {
         return(Response <Routes> .Create("Invalid Request", null, false));
     }
     return(Response <Routes> .Create(Scrapper.Create().GetFlightData(query), true));
 }
Exemplo n.º 17
0
        public void CheckGetNumberOfComment()
        {
            String htmlData = File.ReadAllText($"HtmlData/FullPageOfReview.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();
            Int32 nbComments = scrapper.GetNbComments(htmlData);

            Check.That(nbComments).Equals(86);
        }
        public void CheckUnverifiedPurchase()
        {
            String htmlData = File.ReadAllText($"HtmlData/ReviewFromUnverifiedPurchase.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();
            Review review = scrapper.ExtractComment(htmlData);

            Check.That(review.VerifiedPurchase).Equals(false);
        }
        public void CheckThat2CommentsOnHtml()
        {
            String htmlData = File.ReadAllText($"HtmlData/TwoComments.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();

            List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData);

            Check.That(listOfHtmlComments).HasSize(2);
        }
        public void CheckFullPageOfReviewNbReview()
        {
            String htmlData = File.ReadAllText($"HtmlData/FullPageOfReview.html");

            DataHawk.TechTest.Scrapping.Scrapper scrapper = new Scrapper();

            List <IElement> listOfHtmlComments = scrapper.GetListOfHtmlComment(htmlData);

            Check.That(listOfHtmlComments).HasSize(10);
        }
Exemplo n.º 21
0
        // POST api/values
        public async Task <Response <Roots> > Post([FromBody] Query query)
        {
            if (query == null)
            {
                return(Response <Roots> .Create("Invalid Request", null, false));
            }
            ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation();
            var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

            return(Response <Roots> .Create("success", await Scrapper.Create().GetFlightsDataAsync(query.ToString(), cts.Token), true));
        }
Exemplo n.º 22
0
        public void TestDefaultQuery()
        {
            var queryInfo = new TweetSearchQuery("나이키");


            var scrapper = new Scrapper(_token, queryInfo);
            var tweets   = scrapper.Scrap();

            // 최대 갯수 (MaxScrapTweetCount - 1) + TweetCountPerPage
            Assert.IsTrue(tweets.Count() <= (queryInfo.MaxCount - 1) + queryInfo.CountPerQuery);
        }
Exemplo n.º 23
0
        public ScrapperController()
        {
            Scrapper           = new Scrapper();
            Scrapper.Notifier += scrapper_Notifier;

            StyleConsole();

            Parser.Default.ParseArguments <Options>(Program.Arguments)
            .WithParsed(Run)
            .WithNotParsed(HandleParseError);
        }
Exemplo n.º 24
0
        private void btnImprove_Click(object sender, EventArgs e)
        {
            Scrapper scrapper = new Scrapper();

            LoadSkipRadios(scrapper);

            string file = Path.Combine(Context.InputDataDirectory, @"todos2001.dbf");

            scrapper.GetHiResMapsFromDbfList(file, false);
            Console.WriteLine("Listo");
            MessageBox.Show(this, "Listo");
        }
Exemplo n.º 25
0
        // POST api/values
        public async Task <Response <List <Itinerary> > > Post([FromBody] Query query)
        {
            if (query == null)
            {
                return(Response <List <Itinerary> > .Create("Invalid Request"));
            }

            ScrapR.Models.WebBrowserExtensions.SetFeatureBrowserEmulation();
            var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

            return(Response <List <Itinerary> > .Create(await Scrapper.Create().GetItinerariesAsync(query, cts.Token), true));
        }
Exemplo n.º 26
0
        public void TestDefaultQuery()
        {
            var queryInfo = new UserTimelineQuery()
            {
                ScreenName = "twitterapi"
            };

            var scrapper = new Scrapper(_token, queryInfo);
            var tweets   = scrapper.Scrap();

            // 최대 갯수 (MaxScrapTweetCount - 1) + TweetCountPerPage
            //Assert.IsTrue(tweets.Count() <= (queryInfo.MaxTweetCount - 1) + queryInfo.TweetCountPerPage);
        }
Exemplo n.º 27
0
        private static async Task RunScrappingAsync(IDataHandler <Person> handler, InputDataProvider inputData)
        {
            var logRepository = LogManager.GetRepository(Assembly.GetEntryAssembly());

            XmlConfigurator.Configure(logRepository, new FileInfo(configLoggingFileName));
            ILog log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);

            IScrapper <Person> scrapper        = new Scrapper <Person>();
            IScrapersManager   scrapperManager = new ScrapersManager(inputData, handler, scrapper, log);

            scrapperManager.Notify += ScrapperManagerNotify;

            await scrapperManager.ScrapDataAsync();
        }
Exemplo n.º 28
0
        public async Task TestWorkflow()
        {
            const string bookingPageUrl     = "https://harrypottertheplay.nimaxtheatres.com/hpcc/WEBPAGES/EntaWebGateway/gateway.aspx?E=N&QL=S2728|RCAR1|VPAL|G~/WEBPAGES/EntaWebShow/ShowPerformance.aspx";
            var          pageDownloaderFake = new PageDownloaderFake();

            pageDownloaderFake.SetPage(Scrapper.LandingPageUrl, File.ReadAllText(@"Resources\LandingPage.html"));
            pageDownloaderFake.SetPage(bookingPageUrl, File.ReadAllText(@"Resources\BookingPage.html"));

            var scrapper     = new Scrapper(pageDownloaderFake);
            var performances = await scrapper.DownloadPerformances();

            Assert.NotEmpty(performances);
            Assert.True(pageDownloaderFake.IsInInitialState);
        }
Exemplo n.º 29
0
        public ScrapperTests()
        {
            _configMock     = new Mock <IConfiguration>();
            _httpClientMock = new Mock <IHttpClientProvider>();
            _nodeParserMock = new Mock <INodeProcessor>();
            _areaFinder     = new Mock <IGeoAreaFinder>();

            _httpClientMock.Setup(mock => mock.GetHtmlDocumentWithProxy(It.IsAny <string>())).Returns("some_html_document");
            _nodeParserMock.Setup(mock => mock.ExtractMmsiFromHtml(It.IsAny <string>())).Returns(11111111);
            _nodeParserMock.Setup(mock => mock.ExtractLatFromHtml(It.IsAny <string>())).Returns(11.111);
            _nodeParserMock.Setup(mock => mock.ExtractLonFromHtml(It.IsAny <string>())).Returns(12.112);
            _nodeParserMock.Setup(mock => mock.ExtractAisUpdateTimeFromHtml(It.IsAny <string>(), It.IsAny <string>())).Returns(new DateTime(2020, 01, 01));

            _service = new Scrapper(_configMock.Object, _httpClientMock.Object, _nodeParserMock.Object, _areaFinder.Object);
        }
Exemplo n.º 30
0
        private void btnGet_Click(object sender, EventArgs e)
        {
            Scrapper scrapper = new Scrapper();

            scrapper.ProvFilter  = txtProv.Text;
            scrapper.DptoFilter  = txtDepto.Text;
            scrapper.FracFilter  = txtFrac.Text;
            scrapper.RadioFilter = txtRadio.Text;

            //LoadSkipRadios(scrapper);
            string file = Path.Combine(Context.InputDataDirectory, @"todos2001.dbf");

            scrapper.GetMapsFromDbfList(file);
            Console.WriteLine("Listo");
            MessageBox.Show(this, "Listo");
        }