Example #1
0
        private async void btnStart_Click(object sender, EventArgs e)
        {
            pbMain.Value = 0;
            ToggleGroupBoxes(false);

            TitleBuilder.Build(this, "Initializing");

            try
            {
                var threadInfo = new ThreadInfo(txtThread.Text);

                var scraper = new Scraper(threadInfo, UpdateProgress);
                var files = await scraper.CollectFilePosts(cbWEBM.Checked, cbDuplicates.Checked);

                pbMain.Maximum = files.Count;

                await Task.WhenAll(files.Select(p =>
                    scraper.DownloadAsync(p, rbUID.Checked, txtPath.Text, this)));

                TitleBuilder.Build(this, "Completed", false);
            }
            catch (Exception ex)
            {
                TitleBuilder.Build(this, ex.Message, false);
            }

            ToggleGroupBoxes(true);
        }
 private static string GetPeriodData(ref Scraper.Scraper scraper, string userName, string period)
 {
     var url = String.Format("http://runkeeper.com/activitiesByDateRange?userName={0}&userName={0}&startDate={1}", userName, period);
     //var url = "http://runkeeper.com/activitiesByDateRange?userName="******"&startDate=" + period;
     var lastContent = scraper.PerformRequest(url, null);
     return lastContent;
 }
 private static string Login(ref Scraper.Scraper scraper, string username, string password)
 {
     var lastContent = scraper.PerformRequest(Constants.RunkeeperConstants.URL_START);
     var postdata = "_eventName=submit&email=" + username + "&password=" + password;
     lastContent = scraper.PerformRequest(Constants.RunkeeperConstants.URL_LOGIN, postdata);
     return lastContent;
 }
Example #4
0
        private static void Main(string[] args)
        {
            _boligScraper = new Scraper();

            _userPreference = GetUserPreference();
            _boligPortalRequest = new BoligPortalRequest
                                      {
                                          Amt = ((int) _userPreference.Region).ToString(),
                                          RentMin = "0",
                                          RentMax = _userPreference.RentMax,
                                          ZipCodes = _userPreference.ZipCodes,
                                          ApartmentType = _userPreference.ApartmentTypes,
                                          RentLength = new List<string> {"4"},
                                          Page = "1",
                                          Limit = "15",
                                          SortCol = "3",
                                          SortDesc = "1"
                                      };

            Console.WriteLine("{0} :: Creating infinite loop\n", DateTime.Now);

            // infinite loop
            while (true)
            {
                Tick();

                Thread.Sleep(120000); // 2 minutes
            }
        }
Example #5
0
        private async void btnStart_Click(object sender, EventArgs e)
        {
            pbMain.Value = 0;

            var parser = new Parser(txtThread.Text);
            if (!parser.IsValid())
                return;

            ToggleGroupBoxes(false);
            TitleBuilder.Build(this, "Initializing");

            var threadData = await parser.BuildThreadData();
            var scraper = new Scraper(threadData, UpdateProgress);

            var files = await scraper.CollectFileURLs(cbWEBM.Checked, cbDuplicates.Checked);
            if (files == null)
                return;

            pbMain.Maximum = files.Count;

            await Task.WhenAll(files.Select(p => scraper.DownloadFileAsync(this, p, rbUID.Checked, txtPath.Text)));

            ToggleGroupBoxes(true);
            TitleBuilder.Build(this, "Completed", false);
        }
Example #6
0
        public void SetUp()
        {
            _boligScraper = new Scraper();

            // TODO: Add mock data for being able to test in offline mode
            // e.g. a new local .json file
        }
 public void TestFixtureSetUp()
 {
     list = new List<HtmlDoc>();
     var httpClient = new HttpClient { UserAgentName = "goodbot" };
     var scraper = new Scraper(httpClient, new ScrapedUrisDictionary());
     scraper.Subscribe(new ConsoleWriterObserver());
     scraper.Subscribe(list.Add);
     scraper.Scrape(new Uri("http://localhost:12345")).Wait();
 }
 public void Then_images_should_be_saved()
 {
     var scraper = new Scraper();
     var io = new ImageScraperObserver(new FileWriter(new DirectoryInfo("c:\\temp")));
     scraper.Subscribe(io);
     scraper.Subscribe(new ConsoleWriterObserver());
     //scraper.Subscribe(x => Console.WriteLine(x.Uri));
     scraper.Scrape(new Uri("http://www.cambridgecupcakes.com/"));
 }
Example #9
0
        private void Scrape_Click(object sender, EventArgs e)
        {
            var url = urlBox.Text;

            AddLog(string.Format("Scrapes url:'{0}'", url));
            var scraper = new Scraper();
            var lastContent = scraper.PerformRequest(url);
            AddLog(string.Format("Scraped:'{0}', got:",url));
            AddLog(lastContent);
        }
 public void Then_no_results_should_be_returned()
 {
     var list = new List<HtmlDoc>();
     var httpClient = new HttpClient { UserAgentName = "badbot" };
     var scraper = new Scraper(httpClient, new ScrapedUrisDictionary());
     scraper.Subscribe(new ConsoleWriterObserver());
     scraper.Subscribe(list.Add);
     scraper.Scrape(new Uri("http://localhost:12345")).Wait();
     list.Should().BeEmpty();
 }
Example #11
0
        private async Task FetchRandomThumbs()
        {
            if (!Scraper.IsInitiated)
            {
                Scraper.InstanciateAllDerivedTypes();
            }
            var tasks = Scraper.InstanciatedScrapers.Select(FetchDataAsync).ToList();

            //wait all tasks to complete in order to shuffle the stack
            foreach (var result in await Task.WhenAll(tasks).ConfigureAwait(false))
            {
                if (result == null)
                {
                    continue;                 //skip this scraper
                }
                foreach (var item in result)
                {
                    _pictureDataStack.Push(item);
                }
            }
            _pictureDataStack.Shuffle();
            RaiseDownloadRandomPagesCompleted();
        }
Example #12
0
        private void OpenBagHook_CL(Action <Player> callback, Player plr)
        {
            Action <ScrapedSentData> listener = (d) => {
                int  itemWho = d.Number;
                Item item    = Main.item[itemWho];
                if (item?.active != true)
                {
                    return;
                }

                this.ProcessBagItem(plr, item);
            };

            //

            Scraper.IsScrapingSentData = true;
            Scraper.AddSendDataListener(listener);

            callback.Invoke(plr);

            Scraper.IsScrapingSentData = false;
            Scraper.RemoveSendDataListener(listener);
        }
Example #13
0
        public SchedulesController()
        {
            var systemTime     = new SystemTime();
            var httpClient     = new HttpClient();
            var client         = new Client(httpClient);
            var scraper        = new Scraper(systemTime, client);
            var serializer     = new ScrapeResultSerializer();
            var pathBuilder    = new PathBuilder();
            var storageClient  = new StorageClient(systemTime, pathBuilder);
            var uniqueClient   = new UniqueClient(storageClient);
            var statusRecorder = new UploadStatusRecorder(storageClient, systemTime);

            _scrapeResultRecorder          = new ScrapeResultRecorder(scraper, serializer, storageClient, uniqueClient, statusRecorder);
            _throttledScrapeResultRecorder = new ThrottledScrapeResultRecorder(systemTime, _scrapeResultRecorder);
            var gtfsConverter      = new GtfsConverter();
            var gtfsCsvSerializer  = new GtfsCsvSerializer();
            var gtfsFeedSerializer = new GtfsFeedSerializer(gtfsCsvSerializer);

            _gtfsFeedArchiveRecord = new GtfsFeedArchiveRecorder(storageClient, uniqueClient, gtfsConverter, gtfsFeedSerializer, statusRecorder);
            var settingsService = new SettingsProvider();

            _settings = new Settings(settingsService);
        }
Example #14
0
        public void ShouldReturnTrueUponCreatingCsv()
        {
            Scraper scraper = new Scraper();

            var eventList = new List <Event>
            {
                new Event
                {
                    ArtistorEvent = "Billy Soomro's Interview",
                    City          = "London",
                    Venue         = "SongKick",
                    Time          = "12:30pm",
                    Price         = "Not specified",
                    Availability  = "Available",
                    Date          = "Fri 8th Dec 2017",
                    SpecialGuests = "Bring Me The Horizon"
                }
            };

            var result = scraper.ExportCSV(eventList);

            Assert.AreEqual(true, result);
        }
Example #15
0
        private void runkeeperTest_Click(object sender, EventArgs e)
        {
            var scraper = new Scraper();

            var urlStart = "https://runkeeper.com";
            AddLog(string.Format("Scrapes url:'{0}'", urlStart));
            var lastContent = scraper.PerformRequest(urlStart);
            AddLog(string.Format("Scraped:'{0}', got:", urlStart));
            AddLog(lastContent);

            var urlLogin = "******";
            AddLog(string.Format("Scrapes url:'{0}'", urlLogin));
            var postdata = "_eventName=submit&email=" + userNameBox.Text + "&password="******"Scraped:'{0}', got:", urlLogin));
            AddLog(lastContent);

            var urlInloggad = "http://runkeeper.com/home";
            AddLog(string.Format("Scrapes url:'{0}'", urlInloggad));
            lastContent = scraper.PerformRequest(urlInloggad, null);
            AddLog(string.Format("Scraped:'{0}', got:", urlInloggad));
            AddLog(lastContent);
        }
Example #16
0
        public async Task GetGamesForAPlayerWithNullPointsCell()
        {
            var scraper = new Scraper(_transparentUserAgent);
            var player  = new Player
            {
                ID          = "heanebr01",
                FeedUrl     = "https://www.basketball-reference.com/players/h/",
                Name        = "Brian Heaney",
                FirstSeason = 1970,
                LastSeason  = 1970,
                BirthDate   = new DateTime(1946, 9, 3).AsUtc()
            };

            var games = await scraper.GetGames(player, 1970);

            var regularSeasonGames = games.Where(g => !g.IsPlayoffGame);
            var playoffGames       = games.Where(g => g.IsPlayoffGame);

            Assert.AreEqual(14, regularSeasonGames.Count());
            Assert.AreEqual(6, playoffGames.Count());
            Assert.AreEqual(28, regularSeasonGames.Sum(g => g.Points));
            Assert.AreEqual(0, playoffGames.Sum(g => g.Points));
        }
Example #17
0
        static void Main(string[] args)
        {
            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true);

            IConfigurationRoot configuration = builder.Build();

            var services = new ServiceCollection();

            services.AddDbContext <DatabaseContext>(options =>
                                                    options.UseMySql(configuration["MySQLConnectionString"])
                                                    );

            var serviceProvider = services.BuildServiceProvider();

            var _context = serviceProvider.GetService <DatabaseContext>();

            var scraper = new Scraper(_context);

            scraper.ScrapeMainList("https://psxdatacenter.com/ulist.html");
            scraper.ScrapeMainList("https://psxdatacenter.com/plist.html");
            scraper.ScrapeMainList("https://psxdatacenter.com/jlist.html");
        }
Example #18
0
        public async Task GetGamesForAPlayerWhoPlayedInADoubleheader()
        {
            var scraper = new Scraper(_transparentUserAgent);
            var player  = new Player
            {
                ID          = "bemorir01",
                FeedUrl     = "https://www.basketball-reference.com/players/b/",
                Name        = "Irv Bemoras",
                FirstSeason = 1954,
                LastSeason  = 1957,
                BirthDate   = new DateTime(1930, 11, 18).AsUtc()
            };
            var games = await scraper.GetGames(player, 1954);

            Assert.AreEqual(68, games.Count);
            Assert.AreEqual(68, games.Select(g => g.Date).Distinct().Count());
            Assert.AreEqual(67, games.Select(g => g.Date.Date).Distinct().Count());
            Assert.AreEqual("bemorir01 3/8/1954", games[64].ID);
            Assert.AreEqual("bemorir01 3/8/1954 2", games[65].ID);
            Assert.AreEqual(9, games[64].Points);
            Assert.AreEqual(13, games[65].Points);
            Assert.AreEqual(games[64].Date.AddHours(3), games[65].Date);
            Assert.AreEqual(506, games.Sum(g => g.Points));
        }
Example #19
0
        public async Task GetGamesForAPlayerWhoPlayedOnTwoDifferentTeamsInTheSameDay()
        {
            var scraper = new Scraper(_transparentUserAgent);
            var player  = new Player
            {
                ID          = "johnsne01",
                FeedUrl     = "https://www.basketball-reference.com/players/j/",
                Name        = "Neil Johnston",
                FirstSeason = 1952,
                LastSeason  = 1959,
                BirthDate   = new DateTime(1929, 2, 4).AsUtc()
            };

            var games = await scraper.GetGames(player, 1952);

            var regularSeasonGames = games.Where(g => !g.IsPlayoffGame);
            var playoffGames       = games.Where(g => g.IsPlayoffGame);

            Assert.AreEqual(65, regularSeasonGames.Count());
            Assert.AreEqual(3, playoffGames.Count());
            Assert.AreEqual(64, regularSeasonGames.Where(g => g.Team == "PHW").Count());
            Assert.AreEqual(1, regularSeasonGames.Where(g => g.Team == "SYR").Count());
            Assert.AreEqual(3, playoffGames.Where(g => g.Team == "PHW").Count());
        }
Example #20
0
        public async Task Run()
        {
            var oldRequestResult = await Scraper.GetEntries();

            do
            {
                var newRequestResult = await Scraper.GetEntries();

                var newEntries = GetNewEntries(oldRequestResult, newRequestResult);

                foreach (var entry in newEntries)
                {
                    await TelegramClient
                    .SendMessage(new MessageRequest
                    {
                        Text = $"{entry.Title} - {entry.Price} - {entry.Time}"
                    });
                }

                oldRequestResult = newRequestResult;

                Thread.Sleep(10000);
            }while (true);
        }
Example #21
0
        private void btnSearch_Click(object sender, EventArgs e)
        {
            string keywords = txtSearchBox.Text.Trim();

            try
            {
                if (keywords.Length > 0)
                {
                    lviewResults.Items.Clear();
                    _scraper   = new Scraper("vuighe.net");
                    _listItems = _scraper.GetSearchResults(keywords);
                    GenerateItemsListView();
                }
                else
                {
                    MessageBox.Show(@"Please input the keywords");
                }
            }
            catch (ArgumentNullException ex)
            {
                MessageBox.Show(@"Cannot find result with keywork '" + keywords + "'");
//                MessageBox.Show(ex.StackTrace);
            }
        }
Example #22
0
        #pragma warning disable 1998
        public async override global::System.Threading.Tasks.Task ExecuteAsync()
        {
#line 1 "C:\Users\t-depra\Desktop\ScraperTrial\ScraperTrial\Views\Home\Index.cshtml"

            ViewData["Title"] = "Home Page";
            var scrape = new Scraper();
            // var file = new ScraperFileStore();

#line default
#line hidden
            BeginContext(121, 2, true);
            WriteLiteral("\r\n");
            EndContext();
            BeginContext(124, 17, false);
#line 7 "C:\Users\t-depra\Desktop\ScraperTrial\ScraperTrial\Views\Home\Index.cshtml"
            Write(scrape.Scraping());

#line default
#line hidden
            EndContext();
            BeginContext(141, 2, true);
            WriteLiteral("\r\n");
            EndContext();
        }
Example #23
0
        static async Task ListCharacters(WeasylClient client)
        {
            var user = await client.WhoamiAsync();

            Console.WriteLine(user.login);

            Console.WriteLine("----------");
            var charids = await Scraper.GetCharacterIdsAsync(user.login);

            foreach (int id in charids)
            {
                Console.WriteLine(id);
            }

            Console.WriteLine("----------");
            foreach (int id in charids.Take(3))
            {
                var details = await client.GetCharacterAsync(id);

                Console.WriteLine(details.title);
                Console.WriteLine("Species: " + details.species);
                Console.WriteLine();
            }
        }
Example #24
0
        public override List <Dish> ReadWeeklyMenu()
        {
            var dishes = new List <Dish>();

            var html     = Scraper.ScrapeWebPage(Restaurant.KalasetPÃ¥Fyran.Url);
            var cq       = new CQ(html);
            var menuDate = DateHelper.MondayThisWeek();

            var lunchMenuTags = cq["#main-content p > strong, #main-content li"];

            if (lunchMenuTags == null)
            {
                return(dishes);
            }

            foreach (var tag in lunchMenuTags)
            {
                if (tag.NodeName.Equals("strong", StringComparison.OrdinalIgnoreCase))
                {
                    menuDate = ParseWeekDay(WebUtility.HtmlDecode(tag.InnerText).Trim());
                    continue;
                }

                var description = WebUtility.HtmlDecode(tag.InnerText).Trim();

                /*if (WebUtility.HtmlDecode(description).Trim().Equals("(v) – finns som vegeteriskt alternativ", StringComparison.OrdinalIgnoreCase))
                 * {
                 *      continue;
                 * }*/

                var dish = new Dish(description, menuDate, Restaurant.KalasetPÃ¥Fyran.Id);
                dishes.Add(dish);
            }

            return(dishes);
        }
Example #25
0
        public override List <Dish> ReadWeeklyMenu()
        {
            var dishes = new List <Dish>();

            var html     = Scraper.ScrapeWebPage(Restaurant.Aihaya.Url);
            var cq       = new CQ(html);
            var menuDate = DateHelper.MondayThisWeek();

            var lunchMenuTags = cq[".lunch_menu .menu_header, .lunch_menu .td_title"];

            if (lunchMenuTags == null)
            {
                return(dishes);
            }

            foreach (var tag in lunchMenuTags)
            {
                if (tag.HasClass("menu_header"))
                {
                    menuDate = ParseWeekDay(WebUtility.HtmlDecode(tag.InnerText).Trim());
                    continue;
                }

                var description = tag.InnerText;

                if (WebUtility.HtmlDecode(description).Trim().Equals("(v) – finns som vegeteriskt alternativ", StringComparison.OrdinalIgnoreCase))
                {
                    continue;
                }

                var dish = new Dish(description, menuDate, Restaurant.Aihaya.Id);
                dishes.Add(dish);
            }

            return(dishes);
        }
        public static DivisaModel GetRofexModel()
        {
            var divisas = new List <DivisaViewModel>();

            HtmlNode html = new Scraper(new Uri("http://www.rofex.com.ar/"), Encoding.UTF7).GetNodes();

            var cierre = html.CssSelect("#cierre_monedas");
            var tabla  = cierre.CssSelect("table tr").Skip(1);

            foreach (var htmlNode in tabla)
            {
                var tds = htmlNode.CssSelect("td").ToArray();

                var nombre    = tds[0];
                var compra    = tds[1];
                var venta     = tds[1];
                var variacion = tds[3];

                divisas.Add(new DivisaViewModel
                {
                    Nombre      = nombre.InnerText,
                    Simbolo     = "U$S",
                    ValorCompra = compra.InnerText.Remove(compra.InnerText.Length - 1),
                    ValorVenta  = venta.InnerText.Remove(venta.InnerText.Length - 1),
                    Variacion   = variacion.InnerText,
                });
            }

            var result = new DivisaModel
            {
                Actualizacion = DateTime.Now,
                Divisas       = divisas,
            };

            return(result);
        }
Example #27
0
        public string Update(string guid, [FromBody] Scraper s)
        {
            IScraper scraper = m_scraperService.GetScraper(guid);

            if (scraper == null)
            {
                var error = new LexicalAnalyzer.Models.Error();
                error.Message = "Could not find Scraper with the given GUID";
                return(JsonConvert.SerializeObject(error));
            }
            if (s == null)
            {
                /* The JSON sent was not in the correct format */
                Response.StatusCode = 400;  /* Bad Request */
                var error = new LexicalAnalyzer.Models.Error();
                error.Message = "Invalid structure for Scraper object";
                return(JsonConvert.SerializeObject(error));
            }
            scraper.Properties = s.Properties;
            if (s.Status.ToLower() == "started")
            {
                m_scraperService.StartScraper(guid);
            }
            else if (s.Status.ToLower() == "paused")
            {
                m_scraperService.PauseScraper(guid);
            }
            else
            {
                Response.StatusCode = 400;  /* Bad Request */
                var error = new LexicalAnalyzer.Models.Error();
                error.Message = "The only valid Scraper status values to set are 'start' or 'pause'";
                return(JsonConvert.SerializeObject(error));
            }
            return(JsonConvert.SerializeObject(scraper));
        }
Example #28
0
 private void FillData()
 {
     try
     {
         foreach (var data in _collection.FindAll())
         {
             //remove invalid data
             if (!CheckValid(data))
             {
                 DeleteById(data.Id);
                 continue;
             }
             _cache.Add(new PictureData(Scraper.GetScraperByName(Fussy.DecryptString(data.ScraperName)))
             {
                 ThumbUrl = Fussy.DecryptString(data.ThumbUrl),
                 PageUrl = Fussy.DecryptString(data.PageUrl)
             });
         }
     }
     catch (Exception ex)
     {
         ExManager.Ex(ex);
     }
 }
 public void Init()
 {
     htmlDocument = HtmlDocumentFactory.FromPath(Constants.SampleFutureSyncFilePath);
     scraper      = new Scraper(htmlDocument);
 }
        void _downloader_DownloadProgress(object sender, Scraper.Events.DownloadProgressEventArgs e)
        {
            ProgressValue = e.PercentComplete;
            CurrentActionText = e.Message;

            InvokePropertyChanged("ProgressValue");
            InvokePropertyChanged("CurrentActionText");
        }
Example #31
0
        private async Task SearchMobyGames(string term)
        {
            var scraper = new Scraper(Application.Current.ScraperWebClient());

            var entries = await Task.Run(() => scraper.Search(term));

            var disambiguationDialog = new GameDisambiguationDialog(entries)
            {
                Owner = editGameViewModel.ParentWindow
            };

            if (disambiguationDialog.ShowDialog() != true)
            {
                return;
            }

            var url = disambiguationDialog.SelectedResult.Url;

            if (disambiguationDialog.SelectedResult.Releases.Any())
            {
                // Find the first release that matches our preferred platforms.
                foreach (var platform in PlatformPriorities)
                {
                    var matchingRelease =
                        disambiguationDialog.SelectedResult.Releases
                        .FirstOrDefault(release => release.Platform.Equals(platform, StringComparison.OrdinalIgnoreCase));

                    if (matchingRelease == null || string.IsNullOrEmpty(matchingRelease.Url))
                    {
                        continue;
                    }

                    url = matchingRelease.Url;
                }
            }

            var gameEntry = scraper.GetGame(url);

            GetSpecs(gameEntry);

            GetScreenshots(gameEntry);

            editGameViewModel.GameScreenshots.Clear();

            editGameViewModel.Title             = gameEntry.Name;
            editGameViewModel.GameMobyGamesSlug = gameEntry.Slug;
            editGameViewModel.GameLinks.Add(gameEntry.Url);

            var publisher = editGameViewModel.Publishers.ToList().Find(p => p.Slug == gameEntry.Publisher.Slug);

            if (publisher == null)
            {
                publisher = new Publisher
                {
                    Name  = gameEntry.Publisher.Name,
                    Slug  = gameEntry.Publisher.Slug,
                    Links = new List <string> {
                        gameEntry.Publisher.Url
                    }
                };

                editGameViewModel.Publishers.Add(publisher);
            }

            editGameViewModel.GamePublisher = publisher;

            var developerCollection = editGameViewModel.Developers.ToList();

            foreach (var devEntry in gameEntry.Developers)
            {
                var developer = developerCollection.Find(d => d.Slug == devEntry.Slug);

                if (developer == null)
                {
                    developer = new Developer
                    {
                        Name  = devEntry.Name,
                        Slug  = devEntry.Slug,
                        Links = new List <string> {
                            devEntry.Url
                        },
                    };

                    editGameViewModel.Developers.Add(developer);
                }

                editGameViewModel.GameDevelopers.Add(developer);
            }
        }
 void rule_RemoveRule(object sender, Scraper.Notifier.Event.RemoveRuleEventArgs e)
 {
     e.Rule.RemoveRule -= new EventHandler<Scraper.Notifier.Event.RemoveRuleEventArgs>(rule_RemoveRule);
     Rules.Remove(e.Rule);
 }
Example #33
0
        protected Crawler(IDocumentFactory documentFactory, IKeyValueStore<string, Result> store, IKeyValueStore<string, FetchTarget> frontier)
        {
            _store = store;
            _frontier = frontier;

            var fetcherOptions = new FetcherOptions
            {
                UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
            };

            var parserOptions = new ParserOptions
            {
            };

            var scraperOptions = new ScraperOptions
            {
            };

            var extractorOptions = new ExtractorOptions
            {
            };

            //var storerOptions = new StorerOptions
            //{
            //};

            var builderOptions = new BuilderOptions
            {
            };

            var providerOptions = new ProviderOptions
            {
            };

            //var dispatcherOptions = new DispatcherOptions
            //{
            //};

            Fetcher = new Fetcher(fetcherOptions);
            Parser = new Parser(parserOptions, documentFactory);
            Scraper = new Scraper(scraperOptions);
            Extractor = new Extractor(extractorOptions);
            Storer = new Storer(store);
            Builder = new Builder(builderOptions);
            Provider = new Provider(providerOptions, store, frontier);
            Dispatcher = new Dispatcher();

            Fetcher.SendTo(Parser, x => x.StatusCode == System.Net.HttpStatusCode.OK);

            Parser.SendTo(Scraper);
            Parser.SendTo(Extractor);

            Fetcher.SendTo(Builder, x => x.StatusCode == System.Net.HttpStatusCode.OK);
            Scraper.SendTo(Builder);
            Extractor.SendTo(Builder);

            Builder.SendTo(Storer);

            //Storer.LinkTo(new ActionBlock<Result>(x =>
            //{
            //}));

            Builder.SendTo(Provider);
            Provider.SendTo(Dispatcher, x => x != null);
            Dispatcher.SendTo(Fetcher);
        }
 public ActionResult Scrape()
 {
     Scraper.RunScraper();
     return(RedirectToAction("Recent"));
 }
Example #35
0
        private static async Task Main(string[] args)
        {
            if (File.Exists(ConfigPath))
            {
                Xml.LoadConfig(ConfigPath);
            }

            string argInput;

            try
            {
                argInput = args[0];
            }
            catch { argInput = null; }

            string        dmPath;
            DirectoryInfo chatDir;

PathInput:
            Console.Clear();

            if (argInput == null)
            {
                Logger.Print("Path of files to be scraped: ", LogType.Info, false);
                dmPath = Console.ReadLine();
            }
            else
            {
                dmPath = argInput;
            }


            if (!Directory.Exists(dmPath))
            {
                Console.Clear();
                Logger.Print("Path was not found.", LogType.Error);
                Thread.Sleep(1000);
                goto PathInput;
            }
            else
            {
                chatDir = new DirectoryInfo(dmPath);
            }

            Console.Clear();

            Logger.Print("Press any key to start downloading...", LogType.Info);
            Console.Read();

            var downloader = new Downloader();

            _scraper = new Scraper(downloader, chatDir, dmPath);

            await _scraper.Execute();

            Console.WriteLine();

            Logger.Print($"{DateTime.Now} | Finished scraping and downloading all links and files!", LogType.Info);

            Console.ReadLine();
        }
 public void Scraper_Ctor_Null_Args()
 {
     _ = new Scraper(null);
 }
        void _downloader_DownloadCompleted(object sender, Scraper.Events.DownloadCompletedEventArgs e)
        {
            State = DownloadState.Ok;

            if (e.Cancelled)
            {
                CurrentActionText = "Download was cancelled.";
                State = DownloadState.Warning;
            }
            else if (e.Error != null)
            {
                CurrentActionText = "Unable to download/save requested chaper";
                State = DownloadState.Error;
                _log.Error("Unable to download/save requested chapter.", e.Error);
            }

            Completed = true;

            InvokePropertyChanged("ProgressValue");
            InvokePropertyChanged("CurrentActionText");
            InvokePropertyChanged("Completed");
            InvokePropertyChanged("CanOpen");

            OnDownloadCompleted();
        }
Example #38
0
        static void Main(string[] args)
        {
            OrmLiteConfig.DialectProvider = SqliteDialect.Provider;

            CreateTables();
            GetAndCreateRegions();
            GetAndCreatePollutants();

            foreach (Pollutant pollutant in Globals.PollutantDict.Values)
            {
                Console.WriteLine("\nFetching values for Pollutant: " + pollutant.Name);

                var url = Globals.baseUrl + pollutant.Name;

                var httpClient = new HttpClient();
                var scraper = new Scraper(httpClient, new ScrapedUrisDictionary());

                scraper.Subscribe(new PollutantTableObserver(pollutant.Name));

                scraper.DisableRobotsProtocol = true;
                scraper.Scrape(new Uri(url)).Wait();
            }
        }
        public static IEnumerable <Result> Scrape(string url, int ageLimitMinutes, int maxResultCount, int scrapeDelayMs)
        {
            List <Result> results = new List <Result>();

            if (!url.Contains("&SortBy=LastSeen&Order=desc"))
            {
                url = $"{url}&SortBy=LastSeen&Order=desc";
            }

            if (Uri.TryCreate(url, UriKind.Absolute, out Uri searchUriAsc))
            {
                var searchResultsNodesXPath = "/html/body/div[2]/table/tbody/tr[3]/td[2]/section/div/table/tbody";
                int delay = 0;
                if (scrapeDelayMs > 0)
                {
                    int pagesToScrape = maxResultCount / 10;
                    delay = pagesToScrape > 0 ? scrapeDelayMs / pagesToScrape : scrapeDelayMs;
                }
                Thread.Sleep(delay);

                //GetProxies.FromFreeProxyListNet();
                var siteScraper          = new Scraper();
                var docAsc               = siteScraper.TryLoadHtmlDocument(searchUriAsc);
                var searchResultNodesAsc = siteScraper.GetChildNodes(searchResultsNodesXPath);

                if (searchResultNodesAsc == null)
                {
                    string captchaXpath = "/html/body/div[2]/table/tbody/tr[3]/td[2]/section/div/form/div/div";
                    var    captchaNode  = siteScraper.GetNode(captchaXpath);
                    if (captchaNode != null)
                    {
                        if (PromptUserForCaptcha(url))
                        {
                            return(Scrape(url, ageLimitMinutes, maxResultCount, scrapeDelayMs));
                        }
                    }
                    return(results);
                }

                List <HtmlNode> nodesToParse = new List <HtmlNode>();
                nodesToParse.AddRange(from srn in searchResultNodesAsc where srn.GetClasses().Contains("cursor-pointer") select srn);

                if (nodesToParse.Count > maxResultCount)
                {
                    nodesToParse.RemoveRange((nodesToParse.Count / 2) - ((nodesToParse.Count - maxResultCount) / 2), nodesToParse.Count - maxResultCount);
                }

                var fieldsWereInterestedIn = GetFieldSet();
                foreach (var field in fieldsWereInterestedIn) // remove parent node path from field path
                {
                    string updatedPath = field.xPath.Replace(searchResultsNodesXPath, "");
                    field.xPath = updatedPath;
                }
                foreach (var node in nodesToParse)
                {
                    if (node != null)
                    {
                        var parsedNodeFields = Scraper.ParseNodeFields(node, fieldsWereInterestedIn);
                        results.Add(ReadFieldSet(parsedNodeFields));
                    }
                }

                if (results.Count < maxResultCount && results.Count == 10)
                {
                    if (url.Contains("&SortBy=LastSeen&Order=desc"))
                    {
                        if ((from r in results where DateTime.Now.Subtract(r.LastSeen).TotalMinutes >= ageLimitMinutes select r).Count() > 0)
                        {
                            return(results);
                        }
                    }
                    // check for more pages of results
                    var paginationNodes = siteScraper.GetChildNodes("/html/body/div[2]/table/tbody/tr[3]/td[2]/section/div/div[3]/ul");
                    foreach (var lineItemNode in paginationNodes) // look at all LIs
                    {
                        if (lineItemNode.HasChildNodes)
                        {
                            foreach (var childNode in lineItemNode.ChildNodes)
                            {
                                if (!childNode.HasClass("disabled") && childNode.InnerText == "&gt;")
                                {
                                    if (childNode.Attributes.Contains("href"))
                                    {
                                        string nextPageUrl = childNode.Attributes["href"].Value.Replace("amp;", "");
                                        results.AddRange(Scrape(nextPageUrl, ageLimitMinutes, maxResultCount - results.Count, scrapeDelayMs - delay));
                                    }
                                }
                            }
                        }
                    }
                }

                return(results);
            }
            else
            {
                throw new ArgumentException($"Could not create Uri from {url}", nameof(url));
            }
        }
Example #40
0
        static void Main(string[] args)
        {
            bool offline = false;
            for (int i=0; i<args.Length; i++)
            {
                if (string.Compare(args[i], "-help", true) == 0)
                {
                    Console.WriteLine("Options:");
                    Console.WriteLine("\t-clean\tclear local cache");
                    Console.WriteLine("\t-help\tshow this message");
                    Console.WriteLine("\t-o\toffline mode (use cache only)");
                    return;
                }
                if (string.Compare(args[i], "-clean", true) == 0)
                {
                    new Cache().Clean();
                    return;
                }
                if (string.Compare(args[i], "-o", true) == 0)
                    offline = true;
            }

            Cache cache = new Cache();
            IOrderLoader loader = null;

            if (!offline)
            {
                Amz.Auth.CookiesFirefox cf = new Auth.CookiesFirefox(Properties.Settings.Default.BaseDomain);
                if (cf.Count > 0)
                {
                    Console.WriteLine("Trying Firefox login credentials (" + cf.Count + " cookies)...");
                    loader = new Scraper(cf);
                }
                else Console.Error.WriteLine("Could not log in!");
            }
            else loader = cache;

            if (loader != null)
            {
                try
                {
                    var years = loader.LoadOverview(Properties.Settings.Default.StartUrl);
                    double total = 0.0;

                    foreach (var n in years)
                    {
                        Console.WriteLine("Loading " + n + "...");
                        var orders = loader.LoadYear(n, Properties.Settings.Default.HistoryUrlTemplate);
                        if (orders.Count != cache.Store(orders))
                            loader = cache;

                        double year_total = orders.Aggregate(0.0, (acc, o) => acc + o.Sum);
                        Console.WriteLine("\tTotal: " + year_total);
                        total += year_total;

                        #if DEBUG
                            if (n < 2015) break;
                        #endif
                    }

                    Console.WriteLine("Total: " + total);
                }
                catch (Exception exc)
                {
                    Console.Error.WriteLine(exc.Message);
                }
            }

            cache.Dispose();

            #if DEBUG
            Console.WriteLine("Any key to exit.");
            Console.ReadKey();
            #endif
        }
Example #41
0
        public ActionResult Index()
        {
            IEnumerable <Headline> headlines = Scraper.GetHeadlines();

            return(View(headlines));
        }
        public void Scrape()
        {
            bool covers;
            bool rescrape;
            var view = new ConfirmScrapeView();
            if (view.ShowDialog() == true)
            {
                covers = view._viewModel.GenerateCovers;
                rescrape = view._viewModel.ReScrape;
            }
            else
            {
                return;
            }

            var scraper = new Scraper();

            ProgressReportingActive = true;

            scraper.BookChanged += MainViewModel.i_BookChanged;
            scraper.Worker.RunWorkerCompleted += _worker_RunWorkerCompleted;

            scraper.Scrape(SelectedSourceDirectory, MainViewModel.Books.Cast<Book>().ToList(), covers, rescrape);
            scraper.ProgressComplete += delegate { ProgressReportingActive = false; };
            Refresh();
            _library.CleanImages();
        }
Example #43
0
        static void Main(string[] args)
        {
            try
            {
                Console.WriteLine("Please enter which Udemy course URL that you would like to scrape:");
                var udemyCourseURL = Console.ReadLine() ?? string.Empty;


                using (WebClient client = new WebClient())
                {
                    string content = client.DownloadString(udemyCourseURL);

                    ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder()
                                                    .WithData(content).Build();

                    /*
                     * string content = client.DownloadString($"http://{craigsListCity.Replace(" ", string.Empty)}.craigslist.org/{Method}/{craigsListCategoryName}");
                     *
                     * ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder()
                     *  .WithData(content)
                     *  .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result-title hdrlnk\"">(.*?)")
                     *  .WithRegexOption(RegexOptions.ExplicitCapture)
                     *  .WithPart(new ScrapeCriteriaPartBuilder()
                     *      .WithRegex(@">(.*?</a>")
                     *      .WithRegexOption(RegexOptions.Singleline)
                     *      .Build())
                     *  .WithPart(new ScrapeCriteriaPartBuilder()
                     *      .WithRegex(@"href=\""(.*?)\""")
                     *      .WithRegexOption(RegexOptions.Singleline)
                     *      .Build())
                     *  .Build();
                     */

                    string content = client.DownloadString($"https://www.udemy.com/course/learn-csharp-by-building-applications/");

                    using (FileStream fileStream = new FileStream("output.html", FileMode.Create))
                    {
                        using (StreamWriter streamWriter = new StreamWriter(fileStream))
                        {
                            streamWriter.Write(content);
                        }
                    }

                    ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder()
                                                    .WithData(content)
                                                    .WithRegex(@"/watch\?v=(.*?)~"
                                                               .Replace('~', '\"'))
                                                    .WithRegexOption(RegexOptions.ExplicitCapture)
                                                    .Build();

                    Scraper scraper = new Scraper();

                    var scrapedElements = scraper.Scrape(scrapeCriteria);

                    if (scrapedElements.Any())
                    {
                        foreach (var scrapedElement in scrapedElements)
                        {
                            Console.WriteLine(scrapedElement);
                        }
                    }
                    else
                    {
                        Console.WriteLine("There were no matches for the specified scrape criteria.");
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
Example #44
0
 static void Search()
 {
     scraper = new Scraper();
     SearchAndOutput();
 }
 private static string EnterHome(ref Scraper.Scraper scraper)
 {
     var lastContent = scraper.PerformRequest(Constants.RunkeeperConstants.URL_HOME, null);
     return lastContent;
 }
Example #46
0
 static void Search(string searchTerm)
 {
     scraper = new Scraper(searchTerm);
     SearchAndOutput();
 }
Example #47
0
        static void Main(string[] args)
        {
            try
            {
                // Get city and category from user
                Console.Write("City to scrape information for: ");
                string city = Console.ReadLine() ?? string.Empty;
                Console.Write("CraigsList category: ");
                string category = Console.ReadLine() ?? string.Empty;

                // Use WebClient to pull web page then scrape the listing URL and descriptions
                using (WebClient client = new WebClient())
                {
                    Console.WriteLine($"Scraping page http://{city.Replace(" ", string.Empty)}.craigslist.org/{Method}/{category}");
                    string content = client.DownloadString($"http://{city.Replace(" ", string.Empty)}.craigslist.org/{Method}/{category}");

                    ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder()
                                                    .WithData(content)
                                                    // RegEx for entire listing element
                                                    .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result-title hdrlnk\"">(.*?)</a>")
                                                    .WithRegexOption(RegexOptions.ExplicitCapture)
                                                    // Build scraper for listing description part
                                                    .WithPart(new ScrapeCriteriaPartBuilder()
                                                              .WithRegex(@">(.*?)</a>")
                                                              .WithRegexOption(RegexOptions.Singleline)
                                                              .Build())
                                                    // Build scraper for listing URL part
                                                    .WithPart(new ScrapeCriteriaPartBuilder()
                                                              .WithRegex(@"href=\""(.*?)\""")
                                                              .WithRegexOption(RegexOptions.Singleline)
                                                              .Build())
                                                    .Build();

                    // Call scraper to extract listing elements from page then extract parts from listing elements
                    Scraper scraper         = new Scraper();
                    var     scrapedElements = scraper.Scrape(scrapeCriteria);

                    // Display scraped parts if any exists
                    if (scrapedElements.Any())
                    {
                        foreach (var scrapedElement in scrapedElements)
                        {
                            Console.WriteLine(scrapedElement);
                        }
                    }
                    else
                    {
                        Console.WriteLine("There were no matches for the entered city and category.");
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                Console.Write("Press any key to exit.");
                Console.ReadLine();
            }
        }
Example #48
0
 static void Search(string searchTerm, int pageFrom, int pageTo)
 {
     scraper = new Scraper(searchTerm, pageFrom, pageTo);
     SearchAndOutput();
 }
Example #49
0
 void Scrape()
 {
     Scraper = _auctionWebScraperFactory.CreateAuctionWebScraper();
     Scraper.StartAsync();
 }
Example #50
0
 static void Main(string[] args)
 {
     var scraper = new Scraper();
     scraper.Run();
 }