static bool ValidationForCheckCurrencyAlerts() { var results = WebScraper.GetCurrencyPairs(webSite); var alerts = Config.Get().alerts; bool isValid = false; foreach (var item in alerts) { isValid = results.Result.Exists(m => m.Name == item.name); if (!isValid) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Invalid price alert in appsettings.json!"); } } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Valid price alerts were found in appsettings.json!"); Console.ForegroundColor = ConsoleColor.White; return(isValid); }
static void CheckAlertCriterion(object sender, ElapsedEventArgs e) { var alerts = Config.Get().alerts; var webScrapeCurrencyPairs = WebScraper.GetCurrencyPairs(webSite).Result.AsEnumerable(); var alertCurrencyPairs = from currency in webScrapeCurrencyPairs from alert in alerts where (currency.Name == alert.name && ((currency.BidPrice < alert.price && alert.direction == "-") || (currency.BidPrice > alert.price && alert.direction == "+"))) select currency; if (!alertCurrencyPairs.IsNullOrEmpty()) { string message = ""; foreach (var item in alertCurrencyPairs) { message = item.Name + " hit alert value at " + item.BidPrice + "\n" + message; } EmailSender emailSender = new EmailSender(message); emailSender.Send(); Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Email was sent at {0:HH: mm: ss.fff}", DateTime.Now + "\n" + message + "\n"); Console.ForegroundColor = ConsoleColor.White; } }
/// <summary> /// Asynchronoulsy Returns a d3 tree node headlines object for the given url,site name and headline selector. /// Fetches the html of the webpage asynchornously and extracts the news headline texts values /// using the provided selector (uses Html Agility pack nuget package). The list of headlines is /// cleaned and limited to the set headline count value. A D3TreeNode is then constrcuted for the /// headlines and returned. /// If in error, returns an empty D3TreeNode object. /// </summary> /// <param name="siteName"></param> /// <param name="url"></param> /// <param name="headlineSelector"></param> /// <returns>Task<D3TreeNode></returns> private async Task <D3TreeNode> ConstructHeadlineData(string siteName, string url, string headlineSelector) { try { /* Asynchoronusly read webpage and extract news headlines */ //TODO: Refactor methods to form a pipeed chain of functions var doc = await WebScraper.GetHtmlDocument(url); var rawList = WebScraper.GetRawHeadlines(doc, headlineSelector); var cleanedList = WebScraper.CleanHeadlines(rawList); var list = WebScraper.GetLimitedHeadlines(cleanedList, 12); /* Construct D3 tree data from news headline list*/ var headlineNodes = D3TreeNodeMaker.GetNodesFromStringList(list); var node = D3TreeNodeMaker.ConstructNode(siteName, "null", headlineNodes); return(node); } catch (Exception) { //TODO: Propagate error to caller return(D3TreeNode.Empty()); } }
static void Main(string[] args) { var entry = WebScraper.GetWordMeaning("gringo"); //var test = WebScraper.GetAllWordsFromDRAE(); //var test2 = WebScraper.GetAllWordsFromDRAE("CH"); //var test3 = WebScraper.GetAllWordsFromDRAE('s').Where(x=> x.Contains("sali")).Select(w => WebScraper.GetWordMeaning(w)).ToList(); }
private void SearchLocations() { var page = DownloadPageString("https://www.craigslist.org/about/sites"); var scraper = new WebScraper(); var locs = new List <string>(); _locations = scraper.ScrapeLocations(page); if (_locations.Count > 0) { locationComboBox.Invoke(new Action(() => locationComboBox.Items.Clear())); } //locationComboBox.Items.Clear(); foreach (var location in _locations.Keys) { if (locs.Contains(location.Trim())) { continue; } locationComboBox.Invoke(new Action(() => locationComboBox.Items.Add(location.Trim()))); //locationComboBox.Items.Add(location.Trim()); locs.Add(location.Trim()); } locationComboBox.Invoke(new Action(() => locationComboBox.SelectedIndex = 0)); statusLabel.Invoke(new Action(() => statusLabel.Text = @"Finished")); //startButton.Enabled = stopButton.Enabled = clearButton.Enabled = exportButton.Enabled = true; }
public void TearDown() { _matchBettingDataList = null; _predictionComparisons = null; _fakeWebscraper = null; _uriHCapCouponFake = null; }
public void getJSON_NonDRPage() { RequestAttribute attrs = new RequestAttribute { URI = "https://api.dealerrater.com/", URL = "https://www.google.com/", API = false, AccessToken = "ACCESSTOKENHERE", DealerID = "23685", PaginationOptions = new PaginationOptions { Pages = 1, ResultsPerPage = 10, Offset = 0 } }; WebScraper scraper = new WebScraper(attrs); string returnedJSON = scraper.getJSON(); ReviewCollection collection = JsonConvert.DeserializeObject <ReviewCollection>(returnedJSON); Assert.AreEqual(0, collection.reviews.Count); }
public void ScrapingHtmlDocument_ReturnsDocWithTargetUrl() { var uri = new Uri("http://www.google.com"); HtmlDocument doc = new WebScraper().GetDocument(new ScrapeRequest(uri)); Assert.That(doc.Location, Is.EqualTo(uri)); }
public void getJSON_StressTest() { RequestAttribute attrs = new RequestAttribute { URI = "https://api.dealerrater.com/", URL = "https://www.dealerrater.com/dealer/McKaig-Chevrolet-Buick-A-Dealer-For-The-People-dealer-reviews-23685", API = false, AccessToken = "ACCESSTOKENHERE", DealerID = "23685", PaginationOptions = new PaginationOptions { Pages = 50, ResultsPerPage = 10, Offset = 0 } }; WebScraper scraper = new WebScraper(attrs); string returnedJSON = scraper.getJSON(); ReviewCollection collection = JsonConvert.DeserializeObject <ReviewCollection>(returnedJSON); Assert.AreEqual(attrs.PaginationOptions.Pages * attrs.PaginationOptions.ResultsPerPage, collection.reviews.Count); }
public void getHTML_MultiPage() { RequestAttribute attrs = new RequestAttribute { URI = "https://api.dealerrater.com/", URL = "https://www.dealerrater.com/dealer/McKaig-Chevrolet-Buick-A-Dealer-For-The-People-dealer-reviews-23685", API = false, AccessToken = "ACCESSTOKENHERE", DealerID = "23685", PaginationOptions = new PaginationOptions { Pages = 5, ResultsPerPage = 10, Offset = 0 } }; WebScraper scraper = new WebScraper(attrs); string returnedHTML = scraper.getHTML(); int index, count = 0; while ((index = returnedHTML.ToLower().IndexOf("<!doctype html>")) != -1) { returnedHTML = returnedHTML.Substring(index + 15, returnedHTML.Length - (index + 15)); count++; } Assert.AreEqual(attrs.PaginationOptions.Pages, count); }
private void RunButton_Click(object sender, RoutedEventArgs e) { this.Cursor = Cursors.Wait; var xmlScripts = ScriptInput.Text; var scripts = Script.DeserializeScripts(xmlScripts); this.ScriptText.Text = scripts.First().Documentation; var engine = new WebScraper(); var target = engine.Run(UrlInput.Text, xmlScripts); this.Text.Text = target.First().Value; if (SourceCheckBox.IsChecked.Value) { try { var html = WebScraper.FetchHtml(UrlInput.Text); this.SourceBrowser.NavigateToString(html); } catch (System.Exception ex) { this.SourceBrowser.NavigateToString(ex.Message); } } this.Cursor = Cursors.Arrow; }
public static void Main(string[] args) { var scraper = new WebScraper(); scraper.StartScraping(); CreateWebHostBuilder(args).Build().Run(); }
public List <Player> Scrape(int year, int week) { WebScraper scraper = new WebScraper(null, null, null); JObject json = scraper.ScrapeJson(String.Format("http://api.fantasy.nfl.com/v1/players/stats?statType=weekStats&season={0}&week={1}&format=json", year, week)); List <Player> statistics = new List <Player>(); var players = from player in json["players"] select player; //loop through rows in projection table foreach (JObject jPlayer in players) { //create new datarow Player player = new Player(); //set row values player.Name = jPlayer["name"].ToString(); player.Team = jPlayer["teamAbbr"].ToString(); player.Position = jPlayer["position"].ToString().ToUpper(); player.GamesPlayed = 1; if (player.Position == "QB") { player.PassingYards = decimal.Parse(IsNullStat(jPlayer["stats"][PassingYards])); player.PassingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][PassingTouchdowns])); player.Interceptions = decimal.Parse(IsNullStat(jPlayer["stats"][Interceptions])); player.RushingYards = decimal.Parse(IsNullStat(jPlayer["stats"][RushingYards])); player.RushingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][RushingTouchdowns])); statistics.Add(player); } if (player.Position == "RB") { player.RushingYards = decimal.Parse(IsNullStat(jPlayer["stats"][RushingYards])); player.RushingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][RushingTouchdowns])); player.Receptions = decimal.Parse(IsNullStat(jPlayer["stats"][Receptions])); player.ReceivingYards = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingYards])); player.ReceivingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingTouchdowns])); statistics.Add(player); } if (player.Position == "WR") { player.RushingYards = decimal.Parse(IsNullStat(jPlayer["stats"][RushingYards])); player.RushingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][RushingTouchdowns])); player.Receptions = decimal.Parse(IsNullStat(jPlayer["stats"][Receptions])); player.ReceivingYards = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingYards])); player.ReceivingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingTouchdowns])); statistics.Add(player); } if (player.Position == "TE") { player.Receptions = decimal.Parse(IsNullStat(jPlayer["stats"][Receptions])); player.ReceivingYards = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingYards])); player.ReceivingTouchdowns = decimal.Parse(IsNullStat(jPlayer["stats"][ReceivingTouchdowns])); statistics.Add(player); } } return(statistics); }
private void GetSeasonTeProjections(ref Projections projections) { WebScraper scraper = new WebScraper(null, null, null); HtmlDocument document = scraper.Scrape("https://www.fantasypros.com/nfl/projections/te.php?week=draft"); //HtmlDocument document = scraper.Scrape("https://web.archive.org/web/20150908002135/http://www.fantasypros.com/nfl/projections/te.php?week=draft"); //get projection-data table from html HtmlNode table = document.GetElementbyId(FantasyProsProjectionTable).Descendants().Where(t => t.Name == "tbody").FirstOrDefault <HtmlNode>(); //loop through rows in projection table foreach (HtmlNode row in table.SelectNodes("./tr")) { //create new datarow Player player = new Player(); //parse name and team out of player cell FantasyProsParser parser = new FantasyProsParser(row.SelectSingleNode("./td[1]")); //convert to nfl values NflConverter converter = new NflConverter(parser.Name, parser.Team); //set row values player.Id = projections.SeasonProjectionPlayers.Count + 1; player.Name = converter.Name; player.Position = "TE"; player.NflTeam = converter.NflTeam; player.Receptions = decimal.Parse(row.SelectSingleNode("./td[2]").InnerText) / 16; player.ReceivingYards = decimal.Parse(row.SelectSingleNode("./td[3]").InnerText) / 16; player.ReceivingTouchdowns = decimal.Parse(row.SelectSingleNode("./td[4]").InnerText) / 16; //add datarow to datatable projections.SeasonProjectionPlayers.Add(player); } }
private static void OnTimedEvent(object source, ElapsedEventArgs e) { new Thread(delegate() { WebScraper.Webscrape(); }).Start(); }
/// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void miScraperAutostart_Click(object sender, RoutedEventArgs e) { try { WebScraper scraper = (WebScraper)lvScrapers.SelectedItem; if (scraper != null) { bool auto = !_autostart.Contains(scraper.ScraperId); if (Proxy.SetAutoStart(scraper.ScraperId, auto)) { if (auto) { _autostart.Add(scraper.ScraperId); } else { _autostart.Remove(scraper.ScraperId); } } } } catch (Exception ex) { Log.Error("Error stopping scraper", ex); } }
public MainWindowViewModel(Window mainWindow) { _webScraper = new WebScraper(); _albumDownloader = new AlbumDownloader(true); _mainWindow = mainWindow; _destinationInput = _mainWindow.Find <TextBox>("DestinationInput"); _albumTitle = _mainWindow.Find <Label>("AlbumTitle"); _downloadLog = _mainWindow.Find <TextBlock>("DownloadLog"); _urlInput = _mainWindow.Find <TextBox>("UrlInput"); _titleBar = _mainWindow.Find <DockPanel>("TitleBar"); _applicationTitle = _mainWindow.Find <Label>("ApplicationTitle"); _downloadProgess = _mainWindow.Find <Label>("DownloadProgress"); ExitCommand = ReactiveCommand.Create(Exit); MinimizeCommand = ReactiveCommand.Create(Minimize); OpenFolderCommand = ReactiveCommand.Create(OpenFolder); ReleasesCommand = ReactiveCommand.Create(OpenReleases); IssuesCommand = ReactiveCommand.Create(OpenIssues); DownloadCommand = ReactiveCommand.Create(Download); _albumDownloader.FileDownloaded += AlbumDownloader_FileDownloaded; _albumDownloader.FileDownloading += AlbumDownloader_FileDownloading; _albumDownloader.FileExists += AlbumDownloader_FileExists; _albumDownloader.FileFailed += AlbumDownloader_FileFailed; _albumDownloader.ProgressChanged += AlbumDownloader_ProgressChanged; _titleBar.PointerPressed += TitleBar_PointerPressed; _destinationInput.PointerReleased += DestinationInput_PointerReleased; _applicationTitle.Content = $"cy client - v{Assembly.GetExecutingAssembly().GetName().Version}"; _destinationInput.Text = Environment.CurrentDirectory; _cancellationTokenSource = new CancellationTokenSource(); }
public Discogs(string artist, string track) { Debug.WriteLine("Discogs looking for artist: " + artist + " with track: " + track); args_url[1] += artist; args_url[2] += track; scraper = new WebScraper(SetupURL); if (found = scraper.Contains("We couldn't find anything in the Discogs database matching your search criteria.")) { Debug.WriteLine("\tDiscogs could not find that."); } else { string link = base_url + scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.HasClass("thumbnail_link")).Select(a => a.GetAttributeValue("href", null)).FirstOrDefault(); Debug.WriteLine("\tDiscogs found first track at: " + link); scraper = new WebScraper(link); string hash = GetHashCode().ToString(); string inital_filepath = Downloader._albumart_directory + "/" + hash.ToString(); string image_url = new Uri(scraper.GetHtmlDocument().DocumentNode.Descendants("img").Where(i => i.ParentNode.HasClass("thumbnail_center")).Select(e => e.GetAttributeValue("src", null)).Where(s => !String.IsNullOrEmpty(s)).FirstOrDefault()).ToString(); metadata.artfile = WebScraper.DownloadImage(inital_filepath, image_url); Debug.WriteLine("\tFound album art, stored to: " + metadata.artfile.FullName); // TODO fix these scrapers, crash all the time if there is not info present on the web site //metadata.label = scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.ParentNode.ParentNode.InnerHtml.Contains("Label:") ).FirstOrDefault().InnerHtml; //Debug.WriteLine("\tFound label: " + metadata.label); //metadata.artist = scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.ParentNode.ParentNode.ParentNode.Id.Equals("profile_title")).FirstOrDefault().InnerHtml.Split('(')[0]; //Debug.WriteLine("\tFound artist: " + metadata.artist); //string genre = scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.GetAttributes("href").ToList().Where(attr => attr.Value.Contains("/genre/")).Count() > 0).FirstOrDefault().InnerHtml; //string style = scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.GetAttributes("href").ToList().Where(attr => attr.Value.Contains("/style/")).Count() > 0).FirstOrDefault().InnerHtml; //metadata.genre = genre + " - " + style; //Debug.WriteLine("\tFound genre: " + metadata.genre); //try //{ // metadata.releasedate = DateTime.Parse(scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.GetAttributeValue("href", null).Contains("/search/?decade=")).FirstOrDefault().GetAttributeValue("href", null).Split('=').LastOrDefault().ToString()); // Debug.WriteLine("\tFound releasedate: " + metadata.releasedate.ToString()); //} //catch (Exception) //{ // try // { // metadata.releasedate = DateTime.Parse(scraper.GetHtmlDocument().DocumentNode.Descendants("a").Where(a => a.GetAttributeValue("href", null).Contains("/search/?decade=")).FirstOrDefault().InnerHtml); // Debug.WriteLine("\tFound year: " + metadata.releasedate.ToString()); // } // catch (Exception) // { // Debug.WriteLine("\tDid not find any release date or year!"); // } //} //List<string> titles = new List<string>(scraper.GetHtmlDocument().DocumentNode.Descendants("span").Where(a => a.HasClass("tracklist_track_title") && a.InnerHtml.Length > 0).Select(s => s.InnerHtml)); //Debug.WriteLine("\tFound " + titles.Count + "titles."); //foreach (string t in titles) // Debug.WriteLine("\t\t" + t); //metadata.title = titles.Max<object>(t => StringsSimilarity(track, t.ToString())).ToString(); } }
public async Task Neko() { string nekoImage = WebScraper.GetNeko(); var info = await Context.Client.GetApplicationInfoAsync(); var role = CommandSource.GetMainRole(info.Id, Context); await CommandSource.SendImage(Context, role, nekoImage, "🌸 uwu"); }
public HttpResponseMessage Get() { WebScraper scraper = new WebScraper(); ItemResponse <string> resp = new ItemResponse <string>(); resp.Item = scraper.GetContent("og: description"); return(Request.CreateResponse(System.Net.HttpStatusCode.OK, resp)); }
public async Task AnalyzeArticleImages(IEnumerable <Article> articles) { AzureComputerVision cv = new AzureComputerVision(); byte[] imageData = await WebScraper.GetImageFromUrlAsync("https://static.politico.com/36/0a/aff08ea841e5aa76edddb4bc50d7/190909-donald-trump-gty-773.jpg"); //if (imageData == null) continue; await cv.Analyze(imageData); }
public async Task Cat() { var info = await Context.Client.GetApplicationInfoAsync(); var role = CommandSource.GetMainRole(info.Id, Context); string json = WebScraper.GetCat(); string img = CommandSource.ParseJson(json, "url"); await CommandSource.SendImage(Context, role, img, "🐱 Meoww-"); }
public void GetTimesheet_GetProjectCodes_4ProjectCodes() { var timesheetParser = new HtmlParser(); var scraper = new WebScraper(timesheetParser, _username, _password); var timesheet = scraper.LoginAndGetTimesheet(); Assert.IsNotNull(timesheet); Assert.AreEqual(37.5, timesheet.TotalRequiredHours.TotalHours); TestHelper.PrettyPrintTimesheet(timesheet); }
static void Main(string[] args) { WebScraper scrapper = new WebScraper(false) { Url = "http://www.weatherzone.com.au/" }; scrapper.Dispose(); }
public async Task Test01() { // arrange // act bool result = await WebScraper.GetAndSaveGoogleImageOfTheDay(); // assert Assert.IsTrue(result); }
static void Main(string[] args) { string outputPath = ConfigurationManager.AppSettings.Get("OutputPath"); Console.WriteLine("Digit the AliExpress URL that you want to scrap data:"); var url = Console.ReadLine(); var scraper = new WebScraper(url); int num_pages = 0; do { Console.WriteLine("How many pages do you want to scrap?"); try { num_pages = Int32.Parse(Console.ReadLine()); } catch (Exception) { Console.WriteLine("Invalid input value. Try to use a valid number."); } } while (num_pages == 0); var parser = new AliExpressParser(scraper.LoadPage()); int page_counter = 1; do { parser.ParseDocument(); var outputFile = Path.Combine(outputPath, $"{parser.Category.ToLower().Replace(" ", String.Empty)}_page_{page_counter}.json"); if (File.Exists(outputFile)) { File.Delete(outputFile); } using (var stream = new FileStream(outputFile, FileMode.CreateNew)) { Console.WriteLine($"Saving page {page_counter} into {outputFile}"); parser.WriteOutput(stream); } if (!String.IsNullOrEmpty(parser.NextPage)) { scraper = new WebScraper(parser.NextPage); parser = new AliExpressParser(scraper.LoadPage()); page_counter++; } } while (page_counter <= num_pages); }
public YtCollector(YtStore store, AppCfg cfg, SnowflakeConnectionProvider sf, IPipeCtx pipeCtx, WebScraper webScraper, ChromeScraper chromeScraper, YtClient api) { Store = store; Cfg = cfg; Sf = sf; PipeCtx = pipeCtx; Scraper = webScraper; ChromeScraper = chromeScraper; Api = api; }
public void ImportRules_ExportRules_DeleteRule_ImportLinks_SaveToJson_Test() { var consoleMok = new Mock <MyConsole>(); consoleMok.SetupSequence(c => c.ReadLine()) .Returns("3") //Option "Rule management" .Returns("2") //Option "Import rule" .Returns("1") //Option "Import from json" .Returns(files["Rules"]) // File path to load rules .Returns("3") //Option "Rule management .Returns("3") //Option "Export Rules" .Returns("1") //Option "Export to json" .Returns(files["Rules"]) // File path to save rules .Returns("3") //Option "Rule management" .Returns("7") //Option "Delete rule" .Returns("Title") // Name of the rule to delete .Returns("2") //Option "Parse page" .Returns("1") //Option "Import link from file" .Returns(files["Links"]) // File path to load links .Returns("3") //Option "Export to json" .Returns(files["Scraped values json"]) // File path to save results .Returns("4"); //Option "Exit" var requestServiceMock = new Mock <RequestService>(); requestServiceMock.Setup(x => x.SendRequest("http://example.com")) .Returns("<html><title>Example Domain</title></html>"); var rulesList = new List <ParsingRule> { new ParsingRule("t", "t", "Test"), new ParsingRule("t2", "t2", "Title") }; var urlsList = new List <string> { "http://example.com" }; var fileServiceMock = new Mock <FileService>(); fileServiceMock.Setup(x => x.ExportToJson(It.IsAny <List <Dictionary <string, string> > >(), files["Scraped values json"])) .Returns("Successfuly saved!"); fileServiceMock.Setup(x => x.ExportToJson(It.IsAny <ParsingRule[]>(), files["Rules"])) .Returns("Successfuly saved!"); fileServiceMock.Setup(x => x.ImportFromJson <ParsingRule[]>(files["Rules"])) .Returns(rulesList.ToArray()); fileServiceMock.Setup(x => x.ImportFromJson <string[]>(files["Links"])) .Returns(urlsList.ToArray()); WebScraper webScraper = new WebScraper(consoleMok.Object, requestServiceMock.Object, fileServiceMock.Object); Assert.AreEqual(0, webScraper.Start()); }
public void Test_WebScraper_SystemBusy() { string id = "1"; string url = "/tvshow/30-rock/6"; WebScraper _metacriticScraperZeroCapacity = new WebScraper(null, 0); Assert.That(() => _metacriticScraperZeroCapacity.AddItem(id, url), Throws.Exception.TypeOf <SystemBusyException>(). With.Property("Message"). EqualTo("Too many requests at the moment")); }
public void UpdateScraper(WebScraper newScraper) { ScraperId = newScraper.ScraperId; ScraperName = newScraper.ScraperName; ScraperDescription = newScraper.ScraperDescription; if (this.ScraperInfo == null || !this.ScraperInfo.Equals(newScraper.ScraperInfo)) { this.ScraperInfo = newScraper.ScraperInfo; NotifyPropertyChanged("ScraperInfo"); } }