private bool IsValidURL(ScrapeData entity) { Uri uriResult; return(Uri.TryCreate(entity.Url, UriKind.Absolute, out uriResult) && uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps); }
public List <string> Scrape(ScrapeData scrapeData) { List <string> scrapedElementi = new List <string>(); MatchCollection matches = Regex.Matches(scrapeData.Data, scrapeData.Regex, scrapeData.RegexOption); foreach (Match match in matches) { if (!scrapeData.Parts.Any()) { // dodamo v list nov prvi value iz match vrednosti scrapedElementi.Add(match.Groups[0].Value); } else { foreach (var part in scrapeData.Parts) { Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption); if (matchedPart.Success) { scrapedElementi.Add(matchedPart.Groups[1].Value); } } } } return(scrapedElementi); }
// This method gets called by the runtime. Use this method to configure the HTTP request pipeline. public void Configure(IApplicationBuilder app, IWebHostEnvironment env) { if (env.IsDevelopment()) { app.UseDeveloperExceptionPage(); } app.UseDeveloperExceptionPage(); app.UseHttpsRedirection(); app.UseSwagger(); app.UseSwaggerUI(c => { c.SwaggerEndpoint("/swagger/v1/swagger.json", "My API V1"); c.RoutePrefix = string.Empty; }); app.UseRouting(); app.UseCors(); app.UseAuthorization(); app.UseEndpoints(endpoints => { endpoints.MapControllers(); }); ScrapeData.Scrape(app); }
public static void FlagScrapeStatusToJson(bool status) { string startupPath = Directory.GetCurrentDirectory(); string collectionHistoryPath = Path.Combine(startupPath, "collections.json"); if (File.Exists(collectionHistoryPath)) { string json = string.Empty; using (StreamReader r = new StreamReader(collectionHistoryPath)) { json = r.ReadToEnd(); } if (!string.IsNullOrEmpty(json)) { ScrapeData jdata = JsonConvert.DeserializeObject <ScrapeData>(json); jdata.AllScrapedTillDate = status; var convertedJson = JsonConvert.SerializeObject(jdata, Formatting.Indented); File.WriteAllText(collectionHistoryPath, convertedJson); Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Got all informations till date. Flagging as COMPLETED "); Console.ResetColor(); Thread.Sleep(TimeSpan.FromSeconds(3)); } } }
public static void WriteToJson(ScrapeData data) { string startupPath = Directory.GetCurrentDirectory(); string collectionHistoryPath = Path.Combine(startupPath, "collections.json"); if (!File.Exists(collectionHistoryPath)) { string json = JsonConvert.SerializeObject(data, Formatting.Indented); File.WriteAllText(collectionHistoryPath, json); } else { ScrapeData jdata = GetScrapeDataFromJSONRecordFile(); if (jdata != null) { foreach (var report in data.Reports) { jdata.Reports.Add(report); } jdata.AllScrapedTillDate = data.AllScrapedTillDate; jdata.LastScraped = data.LastScraped; jdata.LastScrapedDatePickerTime = data.LastScrapedDatePickerTime; var convertedJson = JsonConvert.SerializeObject(jdata, Formatting.Indented); File.WriteAllText(collectionHistoryPath, convertedJson); } else { var json = JsonConvert.SerializeObject(data, Formatting.Indented); File.WriteAllText(collectionHistoryPath, json); } } }
private static void ScrapeDataTable(List <List <string> > rows, DateTime date) { if (rows.Count > 0) { ScrapeData data = new ScrapeData(); foreach (var cols in rows) { Report report = new Report(); report.ParentASIN = cols[0]; report.ChildASIN = cols[1]; report.Sessions = int.Parse(Regex.Replace(cols[3], @"[^0-9a-zA-Z]+", "")); report.UnitsOrdered = int.Parse(Regex.Replace(cols[8], @"[^0-9a-zA-Z]+", "")); report.ProductSales = decimal.Parse(Regex.Replace(cols[10], @"[^0-9a-zA-Z]+", "")); report.TotalOrderItems = int.Parse(Regex.Replace(cols[11], @"[^0-9a-zA-Z]+", "")); report.Date = date; data.LastScraped = DateTime.Now; data.LastScrapedDatePickerTime = date; data.Reports.Add(report); } WriteToJson(data); } Thread.Sleep(1000); }
static async Task Main(string[] args) { var tasks = new List <Task <List <SeeTicketUserDataSet> > >(); var url = UrlConnection.BaseUrl; tasks.AddRange(new List <Task <List <SeeTicketUserDataSet> > > { ScrapeData.ScrapeArtistNames(url), ScrapeData.ScrapeVenuesScript(url), ScrapeData.ScrapeDatesScript(url) }); try { var seeTicketUser = await Task.WhenAll(tasks); foreach (var ui in seeTicketUser) { for (int i = 0; i < ui.Count; i++) { SeeTicketUserDataSet item = ui[i]; item.Display(); } ; } } catch (Exception ex) { Console.WriteLine(Message.HTMLPageError, ex.ToString()); } }
private bool IsNewData(ScrapeData entity) { var filter = Builders <ScrapeData> .Filter.Eq("Url", entity.Url); bool isNewData = collection.Find <ScrapeData>(filter).Limit(1).CountDocuments() == 0; return(isNewData); }
public ScrapeData Build() { ScrapeData scrapeData = new ScrapeData(); scrapeData.Data = _data; scrapeData.Regex = _regex; scrapeData.RegexOption = _regexOption; scrapeData.Parts = _part; return(scrapeData); }
public IEnumerable <Tuple <Playground, ScrapeData> > GetPlaygrounds() { var playgroundLinks = GetAllPlaygroundLinks(); var playgroundList = new List <Tuple <Playground, ScrapeData> >(); foreach (var playgroundLink in playgroundLinks) { var page = GetDocumentFromServer(playgroundLink); var playground = GetPlayground(page, playgroundLink); var scrapeData = new ScrapeData(playgroundLink, page.DocumentNode.OuterHtml); playgroundList.Add(new Tuple <Playground, ScrapeData>(playground, scrapeData)); } return(playgroundList); }
public static async Task <string> AddProductToUnicorpAsync(ScrapeData data) { using (var client = new HttpClient()) { client.BaseAddress = new Uri(UnicorpURI); client.DefaultRequestHeaders.Accept.Clear(); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); var content = new StringContent(JsonConvert.SerializeObject(data).ToString(), Encoding.UTF8, "application/json"); var result = client.PostAsync("api/AddSalesCentralScrapeData", content).Result; if (result.IsSuccessStatusCode) { return(await result.Content.ReadAsStringAsync()); } } return(""); }
public List <ScrapeData> GetTheMorningDewData() { var url = theMorningDewSettings.Url; var htmlDoc = web.Load(url); var nodes = htmlDoc.DocumentNode.SelectNodes(theMorningDewSettings.TargetNode); var scrapedData = new List <ScrapeData>(); foreach (var node in nodes) { if (!String.IsNullOrEmpty(node.InnerText) && !String.IsNullOrEmpty(node.GetAttributeValue(theMorningDewSettings.AttributeName, String.Empty))) { var data = new ScrapeData(node.InnerText, node.GetAttributeValue(theMorningDewSettings.AttributeName, String.Empty), DateTime.Now); scrapedData.Add(data); } else { //log } } return(scrapedData); }
public static string AddProductToDB(ScrapeData data) { try { AmazonDBContext amazonDBContext = new AmazonDBContext(); if (amazonDBContext.Database.EnsureCreated()) { if (amazonDBContext.Database.CanConnect()) { var uniqueProductASINs = data.Reports .GroupBy(s => s.ChildASIN) .Select(s => new UniqueProductASIN { ChildAsinID = s.Key }) .ToList(); var availableProductInfoOfDates = data.Reports .GroupBy(s => s.Date) .Select(s => new AvailableProductInfoOfDate { DatePickerDate = s.Key }) .ToList(); if (amazonDBContext.AvailableProductInfoOfDates.Any()) { var productInfoOfDates = amazonDBContext.AvailableProductInfoOfDates.AsQueryable(); var lastCollectionDateFromDB = productInfoOfDates.OrderByDescending(s => s.DatePickerDate).FirstOrDefault(); var lastCollectionDateFromScraper = availableProductInfoOfDates.OrderByDescending(s => s.DatePickerDate).FirstOrDefault(); if (lastCollectionDateFromScraper.DatePickerDate > lastCollectionDateFromDB.DatePickerDate) { foreach (var product in uniqueProductASINs) { amazonDBContext.UniqueProductASINs.Add(product); } foreach (var infoOfDate in availableProductInfoOfDates) { amazonDBContext.AvailableProductInfoOfDates.Add(infoOfDate); } } else { return("Already have informations till date. Not adding to database."); } } else { foreach (var product in uniqueProductASINs) { amazonDBContext.UniqueProductASINs.Add(product); } foreach (var infoOfDate in availableProductInfoOfDates) { amazonDBContext.AvailableProductInfoOfDates.Add(infoOfDate); } } amazonDBContext.SaveChanges(); var childASINSessions = data.Reports .Select(x => new ChildASINSession { ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id, DateID = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id, SessionValue = x.Sessions }).ToList(); var unitsOrderedByAsinId = data.Reports .Select(x => new UnitsOrderedByASINID { ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id, DateID = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id, UnitsOrdered = x.UnitsOrdered }).ToList(); var productSalesByAsinId = data.Reports .Select(x => new ProductSalesByChildASINID { ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id, DateID = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id, Earning = x.ProductSales }).ToList(); var totlaOrderedItemsByAsinId = data.Reports .Select(x => new TotalOrderItemsByASINID { ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id, DateID = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id, TotalOrders = x.UnitsOrdered }).ToList(); foreach (var session in childASINSessions) { amazonDBContext.ChildASINSessions.Add(session); } foreach (var unit in unitsOrderedByAsinId) { amazonDBContext.UnitsOrderedByASINIDs.Add(unit); } foreach (var sales in productSalesByAsinId) { amazonDBContext.ProductSalesByChildASINIDs.Add(sales); } foreach (var ordered in totlaOrderedItemsByAsinId) { amazonDBContext.TotalOrderItemsByASINIDs.Add(ordered); } amazonDBContext.SaveChanges(); return("New Informations Added To Database"); } else { return("Error Storing Data"); } } else { return("Error Creating Database"); } } catch { return("Error Connecting To Database"); } }
static void Main(string[] args) { #region Reading configurations from Json Files var options = new ChromeOptions(); ChromeDriverService service = ChromeDriverService.CreateDefaultService(); service.SuppressInitialDiagnosticInformation = true; IConfiguration configuration = GetAppConfig(); var section = configuration.GetSection("UserAuth"); var emailFromConfig = section.GetValue <string>("Email"); var passwordFromConfig = section.GetValue <string>("Password"); IConfiguration scrapeInfo = GetScrapeInfo(); string lastScrapedDateTimeString = string.Empty; DateTime lastScrapedDateTime = DateTime.MinValue; try { if (!string.IsNullOrEmpty(scrapeInfo.GetValue <string>("LastScrapedDatePickerTime"))) { lastScrapedDateTime = scrapeInfo.GetValue <DateTime>("LastScrapedDatePickerTime"); } else { lastScrapedDateTime = GetDateTime("14", "August", "2018"); } } catch { lastScrapedDateTime = GetDateTime("14", "August", "2018"); } if (!string.IsNullOrEmpty(emailFromConfig) && !string.IsNullOrEmpty(passwordFromConfig)) { Email = emailFromConfig; Password = passwordFromConfig; } UnicorpURI = configuration.GetValue <string>("UnicorpURI"); #endregion using (var driver = new ChromeDriver(service, options)) { driver.Navigate().GoToUrl(baseUrl); try { var signInButton = driver.FindElementByCssSelector("#wp-content > div.as-body.desktop > div.border-color-squid-ink.flex-container.flex-align-items-stretch.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div > div > div.background-color-aqua.border-color-mermaid.padding-left-xxlarge.padding-right-xxlarge.padding-top-xsmall.padding-bottom-xsmall.flex-container.flex-align-items-center.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-mermaid.design-Sell > div > div.border-color-squid-ink.flex-container.flex-align-items-center.flex-align-content-flex-start.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div:nth-child(1) > div.border-color-squid-ink.padding-right-xsmall.flex-container.flex-align-items-stretch.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div > a > strong"); signInButton.Click(); } catch (Exception e) { try { driver.FindElementById("sign-in-button").Click(); } catch { try { var signInButton = driver.FindElementByXPath("/html/body/div/div[1]/div/div/div[1]/div[1]/div/div/div[2]/div/div[2]/div[1]/div[1]/div/a/strong"); signInButton.Click(); } catch { Console.WriteLine(e.Message); } } } if (string.IsNullOrEmpty(Email) || string.IsNullOrEmpty(Password)) { #region Email Input and Password Input Validation Check bool emailEntered = false; while (!emailEntered) { try { Console.WriteLine(); Console.WriteLine("Please enter your Email:"); Email = Console.ReadLine(); emailEntered = true; } catch (ArgumentException ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(ex.Message); Console.ResetColor(); } } bool passwordEntered = false; while (!passwordEntered) { try { Console.WriteLine(); Console.WriteLine("Please enter your Password:"******""; do { ConsoleKeyInfo key = Console.ReadKey(true); // Backspace Should Not Work if (key.Key != ConsoleKey.Backspace && key.Key != ConsoleKey.Enter) { unmaskedPass += key.KeyChar; Console.Write("*"); } else { if (key.Key == ConsoleKey.Backspace && unmaskedPass.Length > 0) { unmaskedPass = unmaskedPass.Substring(0, (unmaskedPass.Length - 1)); Console.Write("\b \b"); } else if (key.Key == ConsoleKey.Enter) { break; } } } while (true); #endregion Password = unmaskedPass; passwordEntered = true; } catch (ArgumentException ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(ex.Message); Console.ResetColor(); } } #endregion } driver.FindElementByName("email").SendKeys(Email); driver.FindElementByName("password").SendKeys(Password); driver.FindElementById("signInSubmit").Click(); #region Captcha try { var captchaBox = driver.FindElement(By.Id("auth-captcha-guess")); if (captchaBox != null) { captchaRaised = true; //re-enter the password driver.FindElementByName("password").SendKeys(Password); #region Captcha Validation Check bool captchaEntered = false; while (!captchaEntered) { try { Console.WriteLine(); Console.Write("Enter Captcha -- "); Captcha = Console.ReadLine(); captchaEntered = true; } catch (ArgumentException ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(ex.Message); Console.ResetColor(); } } #endregion captchaBox.SendKeys(Captcha); driver.FindElementByCssSelector("#a-autoid-0").Click(); captchaDone = true; } } catch (Exception e) { //Console.WriteLine(e.Message); } #endregion #region OTP try { driver.FindElementById("auth-mfa-form"); #region OTP Validation Check bool otpEntered = false; while (!otpEntered) { try { Console.WriteLine(); Console.Write("Enter OTP -- "); OTP = Console.ReadLine(); otpEntered = true; } catch (ArgumentException ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(ex.Message); Console.ResetColor(); } } #endregion driver.FindElementById("auth-mfa-otpcode").SendKeys(OTP); driver.FindElementById("auth-signin-button").Click(); otpDone = true; } catch (Exception e) { Console.WriteLine(e.Message); } try { var wrongOTPBox = driver.FindElement(By.Id("auth-error-message-box")); if (wrongOTPBox != null) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Wrong OTP entered. Please close and restart the process .."); Environment.Exit(0); } } catch (Exception e) { //Console.WriteLine(e.Message); } #endregion var scrapeToDate = DateTime.Now; while (lastScrapedDateTime < scrapeToDate) { string date = lastScrapedDateTime.ToString("MM/dd/yyyy"); var url = "https://sellercentral.amazon.com/gp/site-metrics/load-report-JSON.html/ref=au_xx_cont_sitereport?sortColumn=12&filterFromDate=" + date + "&filterToDate=" + date + "&fromDate=" + date + "&toDate=" + date + "&cols=/c0/c1/c2/c3/c4/c5/c6/c7/c8/c9/c10/c11&reportID=102:DetailSalesTrafficByChildItem&sortIsAscending=0¤tPage=0&dateUnit=1&viewDateUnits=ALL&runDate="; driver.Navigate().GoToUrl(url); try { var json = driver.FindElementById("sc-content-container").Text; var deserializedJSON = JsonConvert.DeserializeObject <ReportJSON>(json); var rows = deserializedJSON.data.rows; ScrapeDataTable(rows, lastScrapedDateTime); } catch { } lastScrapedDateTime = lastScrapedDateTime.AddDays(1); } FlagScrapeStatusToJson(true); #region Send scraped data to DB And Generate Excel try { ScrapeData jdata = GetScrapeDataFromJSONRecordFile(); if (jdata != null) { if (jdata.AllScrapedTillDate) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Storing about " + jdata.Reports.Count + " Records, Please wait .."); string result = AddProductToDB(jdata); Console.WriteLine(jdata.Reports.Count + " Records stored to AmazonDB.db !"); Console.WriteLine(result); Console.WriteLine(); Console.WriteLine("Generating Excel Document ..."); GenereateExcel(); Console.WriteLine("Done !"); Thread.Sleep(TimeSpan.FromSeconds(2)); Environment.Exit(0); } else { Console.ForegroundColor = ConsoleColor.Blue; Console.WriteLine("Not All Records Scraped Till Date ! Exitting ..."); Console.ResetColor(); Thread.Sleep(TimeSpan.FromSeconds(5)); Environment.Exit(0); } } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Invalid JSON Records. Exitting ..."); Console.ResetColor(); Thread.Sleep(TimeSpan.FromSeconds(5)); Environment.Exit(0); } } catch (Exception e) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("UnicorpLTD is not live, Not sending the scraped informations."); Console.ResetColor(); Thread.Sleep(TimeSpan.FromSeconds(5)); Environment.Exit(0); } #endregion } }
public ScrapeInfoService(string link, ScrapeData scrapeDataFor) { this.Link = link; this.ScrapeDataFor = scrapeDataFor; }