Пример #1
0
        private bool IsValidURL(ScrapeData entity)
        {
            Uri uriResult;

            return(Uri.TryCreate(entity.Url, UriKind.Absolute, out uriResult) &&
                   uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
        }
Пример #2
0
        public List <string> Scrape(ScrapeData scrapeData)
        {
            List <string> scrapedElementi = new List <string>();

            MatchCollection matches = Regex.Matches(scrapeData.Data, scrapeData.Regex, scrapeData.RegexOption);

            foreach (Match match in matches)
            {
                if (!scrapeData.Parts.Any())
                {
                    // dodamo v list nov prvi value iz match vrednosti
                    scrapedElementi.Add(match.Groups[0].Value);
                }
                else
                {
                    foreach (var part in scrapeData.Parts)
                    {
                        Match matchedPart = Regex.Match(match.Groups[0].Value, part.Regex, part.RegexOption);

                        if (matchedPart.Success)
                        {
                            scrapedElementi.Add(matchedPart.Groups[1].Value);
                        }
                    }
                }
            }
            return(scrapedElementi);
        }
Пример #3
0
        // This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
        public void Configure(IApplicationBuilder app, IWebHostEnvironment env)
        {
            if (env.IsDevelopment())
            {
                app.UseDeveloperExceptionPage();
            }
            app.UseDeveloperExceptionPage();
            app.UseHttpsRedirection();
            app.UseSwagger();
            app.UseSwaggerUI(c =>
            {
                c.SwaggerEndpoint("/swagger/v1/swagger.json", "My API V1");
                c.RoutePrefix = string.Empty;
            });

            app.UseRouting();
            app.UseCors();
            app.UseAuthorization();

            app.UseEndpoints(endpoints =>
            {
                endpoints.MapControllers();
            });

            ScrapeData.Scrape(app);
        }
Пример #4
0
        public static void FlagScrapeStatusToJson(bool status)
        {
            string startupPath           = Directory.GetCurrentDirectory();
            string collectionHistoryPath = Path.Combine(startupPath, "collections.json");

            if (File.Exists(collectionHistoryPath))
            {
                string json = string.Empty;
                using (StreamReader r = new StreamReader(collectionHistoryPath))
                {
                    json = r.ReadToEnd();
                }
                if (!string.IsNullOrEmpty(json))
                {
                    ScrapeData jdata = JsonConvert.DeserializeObject <ScrapeData>(json);
                    jdata.AllScrapedTillDate = status;
                    var convertedJson = JsonConvert.SerializeObject(jdata, Formatting.Indented);
                    File.WriteAllText(collectionHistoryPath, convertedJson);
                    Console.ForegroundColor = ConsoleColor.Green;
                    Console.WriteLine("Got all informations till date. Flagging as COMPLETED ");
                    Console.ResetColor();
                    Thread.Sleep(TimeSpan.FromSeconds(3));
                }
            }
        }
Пример #5
0
        public static void WriteToJson(ScrapeData data)
        {
            string startupPath           = Directory.GetCurrentDirectory();
            string collectionHistoryPath = Path.Combine(startupPath, "collections.json");

            if (!File.Exists(collectionHistoryPath))
            {
                string json = JsonConvert.SerializeObject(data, Formatting.Indented);
                File.WriteAllText(collectionHistoryPath, json);
            }
            else
            {
                ScrapeData jdata = GetScrapeDataFromJSONRecordFile();
                if (jdata != null)
                {
                    foreach (var report in data.Reports)
                    {
                        jdata.Reports.Add(report);
                    }
                    jdata.AllScrapedTillDate        = data.AllScrapedTillDate;
                    jdata.LastScraped               = data.LastScraped;
                    jdata.LastScrapedDatePickerTime = data.LastScrapedDatePickerTime;
                    var convertedJson = JsonConvert.SerializeObject(jdata, Formatting.Indented);
                    File.WriteAllText(collectionHistoryPath, convertedJson);
                }
                else
                {
                    var json = JsonConvert.SerializeObject(data, Formatting.Indented);
                    File.WriteAllText(collectionHistoryPath, json);
                }
            }
        }
Пример #6
0
        private static void ScrapeDataTable(List <List <string> > rows, DateTime date)
        {
            if (rows.Count > 0)
            {
                ScrapeData data = new ScrapeData();
                foreach (var cols in rows)
                {
                    Report report = new Report();
                    report.ParentASIN              = cols[0];
                    report.ChildASIN               = cols[1];
                    report.Sessions                = int.Parse(Regex.Replace(cols[3], @"[^0-9a-zA-Z]+", ""));
                    report.UnitsOrdered            = int.Parse(Regex.Replace(cols[8], @"[^0-9a-zA-Z]+", ""));
                    report.ProductSales            = decimal.Parse(Regex.Replace(cols[10], @"[^0-9a-zA-Z]+", ""));
                    report.TotalOrderItems         = int.Parse(Regex.Replace(cols[11], @"[^0-9a-zA-Z]+", ""));
                    report.Date                    = date;
                    data.LastScraped               = DateTime.Now;
                    data.LastScrapedDatePickerTime = date;
                    data.Reports.Add(report);
                }

                WriteToJson(data);
            }

            Thread.Sleep(1000);
        }
        static async Task Main(string[] args)
        {
            var tasks = new List <Task <List <SeeTicketUserDataSet> > >();

            var url = UrlConnection.BaseUrl;

            tasks.AddRange(new List <Task <List <SeeTicketUserDataSet> > >
            {
                ScrapeData.ScrapeArtistNames(url),
                ScrapeData.ScrapeVenuesScript(url),
                ScrapeData.ScrapeDatesScript(url)
            });

            try
            {
                var seeTicketUser = await Task.WhenAll(tasks);

                foreach (var ui in seeTicketUser)
                {
                    for (int i = 0; i < ui.Count; i++)
                    {
                        SeeTicketUserDataSet item = ui[i];
                        item.Display();
                    }
                    ;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(Message.HTMLPageError, ex.ToString());
            }
        }
Пример #8
0
        private bool IsNewData(ScrapeData entity)
        {
            var filter = Builders <ScrapeData> .Filter.Eq("Url", entity.Url);

            bool isNewData = collection.Find <ScrapeData>(filter).Limit(1).CountDocuments() == 0;

            return(isNewData);
        }
Пример #9
0
        public ScrapeData Build()
        {
            ScrapeData scrapeData = new ScrapeData();

            scrapeData.Data        = _data;
            scrapeData.Regex       = _regex;
            scrapeData.RegexOption = _regexOption;
            scrapeData.Parts       = _part;
            return(scrapeData);
        }
Пример #10
0
        public IEnumerable <Tuple <Playground, ScrapeData> > GetPlaygrounds()
        {
            var playgroundLinks = GetAllPlaygroundLinks();

            var playgroundList = new List <Tuple <Playground, ScrapeData> >();

            foreach (var playgroundLink in playgroundLinks)
            {
                var page = GetDocumentFromServer(playgroundLink);

                var playground = GetPlayground(page, playgroundLink);
                var scrapeData = new ScrapeData(playgroundLink, page.DocumentNode.OuterHtml);
                playgroundList.Add(new Tuple <Playground, ScrapeData>(playground, scrapeData));
            }

            return(playgroundList);
        }
Пример #11
0
        public static async Task <string> AddProductToUnicorpAsync(ScrapeData data)
        {
            using (var client = new HttpClient())
            {
                client.BaseAddress = new Uri(UnicorpURI);
                client.DefaultRequestHeaders.Accept.Clear();
                client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));



                var content = new StringContent(JsonConvert.SerializeObject(data).ToString(), Encoding.UTF8, "application/json");
                var result  = client.PostAsync("api/AddSalesCentralScrapeData", content).Result;
                if (result.IsSuccessStatusCode)
                {
                    return(await result.Content.ReadAsStringAsync());
                }
            }
            return("");
        }
Пример #12
0
        public List <ScrapeData> GetTheMorningDewData()
        {
            var url         = theMorningDewSettings.Url;
            var htmlDoc     = web.Load(url);
            var nodes       = htmlDoc.DocumentNode.SelectNodes(theMorningDewSettings.TargetNode);
            var scrapedData = new List <ScrapeData>();

            foreach (var node in nodes)
            {
                if (!String.IsNullOrEmpty(node.InnerText) && !String.IsNullOrEmpty(node.GetAttributeValue(theMorningDewSettings.AttributeName, String.Empty)))
                {
                    var data = new ScrapeData(node.InnerText, node.GetAttributeValue(theMorningDewSettings.AttributeName, String.Empty), DateTime.Now);
                    scrapedData.Add(data);
                }
                else
                {
                    //log
                }
            }

            return(scrapedData);
        }
Пример #13
0
        public static string AddProductToDB(ScrapeData data)
        {
            try
            {
                AmazonDBContext amazonDBContext = new AmazonDBContext();
                if (amazonDBContext.Database.EnsureCreated())
                {
                    if (amazonDBContext.Database.CanConnect())
                    {
                        var uniqueProductASINs =
                            data.Reports
                            .GroupBy(s => s.ChildASIN)
                            .Select(s => new UniqueProductASIN {
                            ChildAsinID = s.Key
                        })
                            .ToList();

                        var availableProductInfoOfDates =
                            data.Reports
                            .GroupBy(s => s.Date)
                            .Select(s => new AvailableProductInfoOfDate {
                            DatePickerDate = s.Key
                        })
                            .ToList();

                        if (amazonDBContext.AvailableProductInfoOfDates.Any())
                        {
                            var productInfoOfDates            = amazonDBContext.AvailableProductInfoOfDates.AsQueryable();
                            var lastCollectionDateFromDB      = productInfoOfDates.OrderByDescending(s => s.DatePickerDate).FirstOrDefault();
                            var lastCollectionDateFromScraper = availableProductInfoOfDates.OrderByDescending(s => s.DatePickerDate).FirstOrDefault();
                            if (lastCollectionDateFromScraper.DatePickerDate > lastCollectionDateFromDB.DatePickerDate)
                            {
                                foreach (var product in uniqueProductASINs)
                                {
                                    amazonDBContext.UniqueProductASINs.Add(product);
                                }
                                foreach (var infoOfDate in availableProductInfoOfDates)
                                {
                                    amazonDBContext.AvailableProductInfoOfDates.Add(infoOfDate);
                                }
                            }
                            else
                            {
                                return("Already have informations till date. Not adding to database.");
                            }
                        }
                        else
                        {
                            foreach (var product in uniqueProductASINs)
                            {
                                amazonDBContext.UniqueProductASINs.Add(product);
                            }
                            foreach (var infoOfDate in availableProductInfoOfDates)
                            {
                                amazonDBContext.AvailableProductInfoOfDates.Add(infoOfDate);
                            }
                        }
                        amazonDBContext.SaveChanges();

                        var childASINSessions =
                            data.Reports
                            .Select(x => new ChildASINSession
                        {
                            ChildASINId  = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id,
                            DateID       = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id,
                            SessionValue = x.Sessions
                        }).ToList();


                        var unitsOrderedByAsinId =
                            data.Reports
                            .Select(x => new UnitsOrderedByASINID
                        {
                            ChildASINId  = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id,
                            DateID       = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id,
                            UnitsOrdered = x.UnitsOrdered
                        }).ToList();


                        var productSalesByAsinId =
                            data.Reports
                            .Select(x => new ProductSalesByChildASINID
                        {
                            ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id,
                            DateID      = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id,
                            Earning     = x.ProductSales
                        }).ToList();

                        var totlaOrderedItemsByAsinId =
                            data.Reports
                            .Select(x => new TotalOrderItemsByASINID
                        {
                            ChildASINId = amazonDBContext.UniqueProductASINs.FirstOrDefault(s => s.ChildAsinID == x.ChildASIN).Id,
                            DateID      = amazonDBContext.AvailableProductInfoOfDates.FirstOrDefault(s => s.DatePickerDate == x.Date).Id,
                            TotalOrders = x.UnitsOrdered
                        }).ToList();

                        foreach (var session in childASINSessions)
                        {
                            amazonDBContext.ChildASINSessions.Add(session);
                        }
                        foreach (var unit in unitsOrderedByAsinId)
                        {
                            amazonDBContext.UnitsOrderedByASINIDs.Add(unit);
                        }
                        foreach (var sales in productSalesByAsinId)
                        {
                            amazonDBContext.ProductSalesByChildASINIDs.Add(sales);
                        }
                        foreach (var ordered in totlaOrderedItemsByAsinId)
                        {
                            amazonDBContext.TotalOrderItemsByASINIDs.Add(ordered);
                        }
                        amazonDBContext.SaveChanges();
                        return("New Informations Added To Database");
                    }
                    else
                    {
                        return("Error Storing Data");
                    }
                }
                else
                {
                    return("Error Creating Database");
                }
            }
            catch
            {
                return("Error Connecting To Database");
            }
        }
Пример #14
0
        static void Main(string[] args)
        {
            #region Reading configurations from Json Files
            var options = new ChromeOptions();
            ChromeDriverService service = ChromeDriverService.CreateDefaultService();
            service.SuppressInitialDiagnosticInformation = true;
            IConfiguration configuration             = GetAppConfig();
            var            section                   = configuration.GetSection("UserAuth");
            var            emailFromConfig           = section.GetValue <string>("Email");
            var            passwordFromConfig        = section.GetValue <string>("Password");
            IConfiguration scrapeInfo                = GetScrapeInfo();
            string         lastScrapedDateTimeString = string.Empty;
            DateTime       lastScrapedDateTime       = DateTime.MinValue;
            try
            {
                if (!string.IsNullOrEmpty(scrapeInfo.GetValue <string>("LastScrapedDatePickerTime")))
                {
                    lastScrapedDateTime = scrapeInfo.GetValue <DateTime>("LastScrapedDatePickerTime");
                }
                else
                {
                    lastScrapedDateTime = GetDateTime("14", "August", "2018");
                }
            }
            catch
            {
                lastScrapedDateTime = GetDateTime("14", "August", "2018");
            }
            if (!string.IsNullOrEmpty(emailFromConfig) && !string.IsNullOrEmpty(passwordFromConfig))
            {
                Email    = emailFromConfig;
                Password = passwordFromConfig;
            }
            UnicorpURI = configuration.GetValue <string>("UnicorpURI");
            #endregion

            using (var driver = new ChromeDriver(service, options))
            {
                driver.Navigate().GoToUrl(baseUrl);
                try
                {
                    var signInButton = driver.FindElementByCssSelector("#wp-content > div.as-body.desktop > div.border-color-squid-ink.flex-container.flex-align-items-stretch.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div > div > div.background-color-aqua.border-color-mermaid.padding-left-xxlarge.padding-right-xxlarge.padding-top-xsmall.padding-bottom-xsmall.flex-container.flex-align-items-center.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-mermaid.design-Sell > div > div.border-color-squid-ink.flex-container.flex-align-items-center.flex-align-content-flex-start.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div:nth-child(1) > div.border-color-squid-ink.padding-right-xsmall.flex-container.flex-align-items-stretch.flex-align-content-flex-start.flex-full-width.amsg-2018.fonts-loaded.border-color-squid-ink.design-Sell > div > a > strong");
                    signInButton.Click();
                }
                catch (Exception e)
                {
                    try
                    {
                        driver.FindElementById("sign-in-button").Click();
                    }
                    catch
                    {
                        try
                        {
                            var signInButton = driver.FindElementByXPath("/html/body/div/div[1]/div/div/div[1]/div[1]/div/div/div[2]/div/div[2]/div[1]/div[1]/div/a/strong");
                            signInButton.Click();
                        }
                        catch
                        {
                            Console.WriteLine(e.Message);
                        }
                    }
                }

                if (string.IsNullOrEmpty(Email) || string.IsNullOrEmpty(Password))
                {
                    #region Email Input and Password Input Validation Check
                    bool emailEntered = false;
                    while (!emailEntered)
                    {
                        try
                        {
                            Console.WriteLine();
                            Console.WriteLine("Please enter your Email:");
                            Email        = Console.ReadLine();
                            emailEntered = true;
                        }
                        catch (ArgumentException ex)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine(ex.Message);
                            Console.ResetColor();
                        }
                    }
                    bool passwordEntered = false;
                    while (!passwordEntered)
                    {
                        try
                        {
                            Console.WriteLine();
                            Console.WriteLine("Please enter your Password:"******"";
                            do
                            {
                                ConsoleKeyInfo key = Console.ReadKey(true);
                                // Backspace Should Not Work
                                if (key.Key != ConsoleKey.Backspace && key.Key != ConsoleKey.Enter)
                                {
                                    unmaskedPass += key.KeyChar;
                                    Console.Write("*");
                                }
                                else
                                {
                                    if (key.Key == ConsoleKey.Backspace && unmaskedPass.Length > 0)
                                    {
                                        unmaskedPass = unmaskedPass.Substring(0, (unmaskedPass.Length - 1));
                                        Console.Write("\b \b");
                                    }
                                    else if (key.Key == ConsoleKey.Enter)
                                    {
                                        break;
                                    }
                                }
                            } while (true);
                            #endregion

                            Password        = unmaskedPass;
                            passwordEntered = true;
                        }
                        catch (ArgumentException ex)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine(ex.Message);
                            Console.ResetColor();
                        }
                    }
                    #endregion
                }


                driver.FindElementByName("email").SendKeys(Email);
                driver.FindElementByName("password").SendKeys(Password);
                driver.FindElementById("signInSubmit").Click();

                #region Captcha
                try
                {
                    var captchaBox = driver.FindElement(By.Id("auth-captcha-guess"));
                    if (captchaBox != null)
                    {
                        captchaRaised = true;
                        //re-enter the password
                        driver.FindElementByName("password").SendKeys(Password);
                        #region Captcha Validation Check
                        bool captchaEntered = false;
                        while (!captchaEntered)
                        {
                            try
                            {
                                Console.WriteLine();
                                Console.Write("Enter Captcha -- ");
                                Captcha        = Console.ReadLine();
                                captchaEntered = true;
                            }
                            catch (ArgumentException ex)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine(ex.Message);
                                Console.ResetColor();
                            }
                        }
                        #endregion
                        captchaBox.SendKeys(Captcha);
                        driver.FindElementByCssSelector("#a-autoid-0").Click();
                        captchaDone = true;
                    }
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                }
                #endregion

                #region OTP
                try
                {
                    driver.FindElementById("auth-mfa-form");

                    #region OTP Validation Check
                    bool otpEntered = false;
                    while (!otpEntered)
                    {
                        try
                        {
                            Console.WriteLine();
                            Console.Write("Enter OTP -- ");
                            OTP        = Console.ReadLine();
                            otpEntered = true;
                        }
                        catch (ArgumentException ex)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine(ex.Message);
                            Console.ResetColor();
                        }
                    }
                    #endregion

                    driver.FindElementById("auth-mfa-otpcode").SendKeys(OTP);
                    driver.FindElementById("auth-signin-button").Click();
                    otpDone = true;
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }


                try
                {
                    var wrongOTPBox = driver.FindElement(By.Id("auth-error-message-box"));
                    if (wrongOTPBox != null)
                    {
                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.WriteLine("Wrong OTP entered. Please close and restart the process ..");
                        Environment.Exit(0);
                    }
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                }

                #endregion


                var scrapeToDate = DateTime.Now;


                while (lastScrapedDateTime < scrapeToDate)
                {
                    string date = lastScrapedDateTime.ToString("MM/dd/yyyy");

                    var url = "https://sellercentral.amazon.com/gp/site-metrics/load-report-JSON.html/ref=au_xx_cont_sitereport?sortColumn=12&filterFromDate=" + date + "&filterToDate=" + date + "&fromDate=" + date + "&toDate=" + date + "&cols=/c0/c1/c2/c3/c4/c5/c6/c7/c8/c9/c10/c11&reportID=102:DetailSalesTrafficByChildItem&sortIsAscending=0&currentPage=0&dateUnit=1&viewDateUnits=ALL&runDate=";

                    driver.Navigate().GoToUrl(url);


                    try
                    {
                        var json             = driver.FindElementById("sc-content-container").Text;
                        var deserializedJSON = JsonConvert.DeserializeObject <ReportJSON>(json);
                        var rows             = deserializedJSON.data.rows;
                        ScrapeDataTable(rows, lastScrapedDateTime);
                    }
                    catch { }



                    lastScrapedDateTime = lastScrapedDateTime.AddDays(1);
                }

                FlagScrapeStatusToJson(true);

                #region Send scraped data to DB And Generate Excel
                try
                {
                    ScrapeData jdata = GetScrapeDataFromJSONRecordFile();

                    if (jdata != null)
                    {
                        if (jdata.AllScrapedTillDate)
                        {
                            Console.ForegroundColor = ConsoleColor.Green;
                            Console.WriteLine("Storing about " + jdata.Reports.Count + " Records, Please wait ..");
                            string result = AddProductToDB(jdata);
                            Console.WriteLine(jdata.Reports.Count + " Records stored to AmazonDB.db !");
                            Console.WriteLine(result);
                            Console.WriteLine();
                            Console.WriteLine("Generating Excel Document ...");
                            GenereateExcel();
                            Console.WriteLine("Done !");
                            Thread.Sleep(TimeSpan.FromSeconds(2));
                            Environment.Exit(0);
                        }
                        else
                        {
                            Console.ForegroundColor = ConsoleColor.Blue;
                            Console.WriteLine("Not All Records Scraped Till Date ! Exitting ...");
                            Console.ResetColor();
                            Thread.Sleep(TimeSpan.FromSeconds(5));
                            Environment.Exit(0);
                        }
                    }
                    else
                    {
                        Console.ForegroundColor = ConsoleColor.Red;
                        Console.WriteLine("Invalid JSON Records. Exitting ...");
                        Console.ResetColor();
                        Thread.Sleep(TimeSpan.FromSeconds(5));
                        Environment.Exit(0);
                    }
                }
                catch (Exception e)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine("UnicorpLTD  is not live, Not sending the scraped informations.");
                    Console.ResetColor();
                    Thread.Sleep(TimeSpan.FromSeconds(5));
                    Environment.Exit(0);
                }



                #endregion
            }
        }
Пример #15
0
 public ScrapeInfoService(string link, ScrapeData scrapeDataFor)
 {
     this.Link          = link;
     this.ScrapeDataFor = scrapeDataFor;
 }