// Insert flights rows into Scraper public void WritePrices() { FlightsSql flightsSql = new FlightsSql(connectionString); List <FlightUrl> destinations = flightsSql.LoadDestinationsToScrape(connectionString); //Flight object for flight methods Flight flight = new Flight(); List <decimal> prices = flight.ScrapePrice(DateAndTime.CurrentDateTime(), connectionString); try { using (SqlConnection conn = new SqlConnection(connectionString)) { conn.Open(); for (int i = 0; i < prices.Count; i++) { SqlCommand cmd = new SqlCommand(SQL_WriteFlightInfo, conn); cmd.Parameters.AddWithValue("@price", prices[i]); cmd.Parameters.AddWithValue("@originCode", "LAX"); cmd.Parameters.AddWithValue("@departureDate", destinations[i].Depart); cmd.Parameters.AddWithValue("@returnDate", destinations[i].Return); cmd.Parameters.AddWithValue("@destinationCode", destinations[i].DestinationAirportCode); cmd.Parameters.AddWithValue("@logTime", DateAndTime.CurrentDateTime()); int worked = cmd.ExecuteNonQuery(); if (worked > 0) { Console.WriteLine($"Input {worked} row"); } } } } catch (SqlException e) { Email.SendEmailFailure(e.ToString(), "Failed during: public void WritePrices()"); throw; } }
//scrape prices from SkyScanner //public List<decimal> ScrapePrice() //{ // FlightsSql flightsSql = new FlightsSql(connectionString); // List<FlightUrl> destinations = flightsSql.LoadDestinationsToScrape(); // using (WebBrowser wb = new WebBrowser()) // { // string text = ""; // decimal price = 0; // bool isWorking = false; // int count = 0; // int tryCounter = 0; // List<decimal> prices = new List<decimal>(); // foreach (var url in destinations) // { // isWorking = false; // Console.WriteLine($"Trying {url.DestinationAirportCode}..."); // while (!isWorking) // { // wb.ScriptErrorsSuppressed = true; // wb.Navigate($"{url}"); // while (wb.ReadyState != WebBrowserReadyState.Complete || tryCounter != 5) // { // Application.DoEvents(); // Thread.Sleep(10000); // Application.DoEvents(); // tryCounter++; // } // if (wb.Document.GetElementsByTagName("tbody").Count > 0 || tryCounter == 5) // { // isWorking = true; // } // } // if (tryCounter == 5) // { // count++; // Console.WriteLine($"Number {count} of {destinations.Count} failed."); // tryCounter = 0; // } // else // { // text = wb.Document.GetElementsByTagName("tbody")[0].InnerText.Substring(5, 7); // if (text.Contains(" ")) // { // int limit = text.IndexOf(" "); // text = wb.Document.GetElementsByTagName("tbody")[0].InnerText.Substring(5, limit); // price = Decimal.Parse(text); // } // prices.Add(price); // count++; // Console.WriteLine($"{count}/{destinations.Count} Done. Price = ${price}"); // } // } // return prices; // } //} public List <decimal> ScrapePrice(DateTime now, string connectionString) { FlightsSql flightsSql = new FlightsSql(connectionString); List <FlightUrl> destinations = flightsSql.LoadDestinationsToScrape(connectionString); int tryCount = 0; string text = ""; decimal price = 0; bool isWorking = false; int count = 0; int failureCount = 0; List <decimal> prices = new List <decimal>(); for (int i = 0; i < 50; i++) { using (WebBrowser wb = new WebBrowser()) { isWorking = false; Console.WriteLine($"Trying {destinations[i].DestinationAirportCode}..."); while (!isWorking) { wb.ScriptErrorsSuppressed = true; wb.Navigate($"{destinations[i]}"); while ((wb.ReadyState != WebBrowserReadyState.Complete) && tryCount != 5) { Application.DoEvents(); Thread.Sleep(3000); Application.DoEvents(); tryCount++; } if (tryCount == 5) { count++; isWorking = true; Email.SendEmailFailure("Flight data failure", $"Failed at " + $"{count}/{destinations.Count} - {destinations[i].DestinationAirportCode} after {tryCount} attempts."); Console.WriteLine($"Failed at {count}/{destinations.Count} after {tryCount} attempts"); failureCount++; tryCount = 0; } else if (wb.Document.GetElementsByTagName("tbody").Count > 0) { isWorking = true; tryCount = 0; if (wb.Document.GetElementsByTagName("tbody").Count > 0) { text = wb.Document.GetElementsByTagName("tbody")[0].InnerText.Substring(5, 7); if (text.Contains(" ")) { int limit = text.IndexOf(" "); text = wb.Document.GetElementsByTagName("tbody")[0].InnerText.Substring(5, limit); price = Decimal.Parse(text); } prices.Add(price); count++; Console.WriteLine($"{count}/{destinations.Count} Done."); if (DateAndTime.CurrentDateTime().Minute - now.Minute >= 30) { Email.SendEmail($"Flight data update at {DateAndTime.CurrentDateTime()}", $"SurfScraper has gotten {Percent.DataAvailableVsPercentScraped(count, failureCount, 50)}% of all flight data so far."); } if (DateAndTime.CurrentDateTime().Minute - now.Minute >= 60) { Email.SendEmail($"Flight data update at {DateAndTime.CurrentDateTime()}", $"SurfScraper has gotten {Percent.DataAvailableVsPercentScraped(count, failureCount, 50)}% of all flight data so far."); } } } } } } Email.SendEmail("Flight data overview", $"SurfScraper got " + $"{Percent.DataAvailableVsPercentScraped(count, failureCount, 50)}% of all flight data."); return(prices); }
//public void LogSurfData() //{ // //these counters all work together to allow for effective scraping // //total log count // int totalLogCount = 0; // //total log spot start count // int totalLogPerSpotStartCount = 8; // //spot log count // int totalLogPerSpotCount = 0; // //day count // int dayCount = 0; // //time count // int timeCount = 0; // //load list of date times // List<TimeSpan> times = Utility.LoadTimes(); // //load list of spots // SurfHeightSql surfSql = new SurfHeightSql(connectionString); // List<SurfUrl> spots = surfSql.LoadSpotsToScrape(); // //load list of surf heights // Surf surf = new Surf(); // List<decimal> surfHeight = surf.ScrapeSurfHeight(); // try // { // using (SqlConnection conn = new SqlConnection(connectionString)) // { // conn.Open(); // foreach (var item in spots) // { // //there are 79 nodes of data to scrape per page // //(10 days and 8 forecast nodes per day) except // //for last day which has 7 nodes // while (totalLogPerSpotCount < 79) // { // //this will prevent the scraping from breaking by trying to // //scrape the "extra" 8th node on the 10th forecast day // //instead it will only scrape 7 // if (totalLogPerSpotCount == 72) // { // timeCount = 0; // for (int i = totalLogCount; i < totalLogPerSpotStartCount - 1; i++) // { // SqlCommand cmd = new SqlCommand(SQL_WriteSurfInfo, conn); // cmd.Parameters.AddWithValue("@waveHeight", surfHeight[i]); // cmd.Parameters.AddWithValue("@logDate", Utility.CurrentDateTime()); // cmd.Parameters.AddWithValue("@locationId", item.LocationId); // cmd.Parameters.AddWithValue("@spotName", item.SpotName); // cmd.Parameters.AddWithValue("@forecastForDate", Utility.CurrentDateTime().AddDays(dayCount)); // cmd.Parameters.AddWithValue("@ForecastForTime", times[timeCount]); // cmd.Parameters.AddWithValue("@spotId", item.SpotId); // cmd.ExecuteNonQuery(); // totalLogPerSpotCount++; // timeCount++; // } // } // //this is the same as above but for forecast days 1-9 which have all 8 nodes // else // { // timeCount = 0; // for (int i = totalLogCount; i < totalLogPerSpotStartCount; i++) // { // SqlCommand cmd = new SqlCommand(SQL_WriteSurfInfo, conn); // cmd.Parameters.AddWithValue("@waveHeight", surfHeight[i]); // cmd.Parameters.AddWithValue("@logDate", Utility.CurrentDateTime()); // cmd.Parameters.AddWithValue("@locationId", item.LocationId); // cmd.Parameters.AddWithValue("@spotName", item.SpotName); // cmd.Parameters.AddWithValue("@forecastForDate", Utility.CurrentDateTime().AddDays(dayCount)); // cmd.Parameters.AddWithValue("@ForecastForTime", times[timeCount].ToString()); // cmd.Parameters.AddWithValue("@spotId", item.SpotId); // cmd.ExecuteNonQuery(); // totalLogPerSpotCount++; // timeCount++; // } // } // dayCount++; // totalLogCount += 7; // totalLogPerSpotStartCount += 7; // } // totalLogPerSpotCount = 0; // dayCount = 0; // } // } // } // catch (SqlException ex) // { // throw; // } //} public void LogSurfData() { //these counters all work together to allow for effective scraping //total log count int totalLogCount = 0; //total log spot start count int totalLogPerSpotStartCount = 7; //day count int dayCount = 0; //failure count for logging int failureCount = 0; //variable to hold the average surf height per day for each spot decimal averageSurfHeight = 0; //variable to hold the average wind direction per day for each spot decimal averageWindDirection = 0; //load list of date times List <TimeSpan> times = DateAndTime.LoadTimes(); //load list of spots SurfHeightSql surfSql = new SurfHeightSql(connectionString); List <SurfUrl> spots = surfSql.LoadSpotsToScrape(); //load list of surf heights Surf surf = new Surf(); List <decimal> surfHeight = surf.ScrapeSurfHeight(connectionString); // List<int> windDirection = surf.ScrapeWindDirection(connectionString); try { using (SqlConnection conn = new SqlConnection(connectionString)) { conn.Open(); foreach (var item in spots) { //there are 79 nodes of data to scrape per page //(10 days and 8 forecast nodes per day) except //for last day which has 7 nodes while (dayCount < 10) { //this will prevent the scraping from breaking by trying to //scrape the "extra" 8th node on the 10th forecast day //instead it will only scrape 7 if (dayCount == 9) { for (int i = totalLogCount; i <= totalLogPerSpotStartCount - 1; i++) { averageSurfHeight += surfHeight[i]; } //average for the last day with only 7 nodes of forecast data averageSurfHeight /= 7; //for (int i = totalLogCount; i <= totalLogPerSpotStartCount - 1; i++) //{ // averageWindDirection += windDirection[i]; //} ////average for the last day with only 7 nodes of forecast data //averageWindDirection /= 7; try { SqlCommand cmd = new SqlCommand(SQL_WriteSurfInfo, conn); cmd.Parameters.AddWithValue("@swellHeight", averageSurfHeight); cmd.Parameters.AddWithValue("@logDate", DateAndTime.CurrentDateTime()); cmd.Parameters.AddWithValue("@locationId", item.LocationId); cmd.Parameters.AddWithValue("@spotName", item.SpotName); cmd.Parameters.AddWithValue("@forecastForDate", DateAndTime.CurrentDateTime().AddDays(dayCount)); cmd.Parameters.AddWithValue("@spotId", item.SpotId); cmd.Parameters.AddWithValue("@windDirection", Math.Round(averageWindDirection, 0)); cmd.ExecuteNonQuery(); } catch (Exception e) { Email.SendEmailFailure(e.ToString(), $"Surf data failure at {item} for day {dayCount}"); failureCount++; } averageSurfHeight = 0; averageWindDirection = 0; Console.WriteLine("Inserted 1 row"); totalLogCount += 7; totalLogPerSpotStartCount += 7; } //this is the same as above but for forecast days 1-9 which have all 8 nodes else { for (int i = totalLogCount; i <= totalLogPerSpotStartCount; i++) { averageSurfHeight += surfHeight[i]; } //average for first 9 days with only 8 nodes of forecast data averageSurfHeight /= 8; //for (int i = totalLogCount; i <= totalLogPerSpotStartCount; i++) //{ // averageWindDirection += windDirection[i]; //} ////average for the last day with only 8 nodes of forecast data //averageWindDirection /= 8; try { SqlCommand cmd = new SqlCommand(SQL_WriteSurfInfo, conn); cmd.Parameters.AddWithValue("@swellHeight", averageSurfHeight); cmd.Parameters.AddWithValue("@logDate", DateAndTime.CurrentDateTime()); cmd.Parameters.AddWithValue("@locationId", item.LocationId); cmd.Parameters.AddWithValue("@spotName", item.SpotName); cmd.Parameters.AddWithValue("@forecastForDate", DateAndTime.CurrentDateTime().AddDays(dayCount)); cmd.Parameters.AddWithValue("@spotId", item.SpotId); cmd.Parameters.AddWithValue("@windDirection", Math.Round(averageWindDirection, 0)); cmd.ExecuteNonQuery(); } catch (Exception e) { Email.SendEmailFailure(e.ToString(), $"Surf data failure at {item} for day {dayCount}"); failureCount++; } averageSurfHeight = 0; averageWindDirection = 0; Console.WriteLine("Inserted 1 row"); totalLogCount += 8; totalLogPerSpotStartCount += 8; } dayCount++; } dayCount = 0; } } } catch (SqlException ex) { throw; } }