Esempio n. 1
0
        private async Task <Cookie> ReadAntiScrappingCookieAsync()
        {
            Cookie antiScrappingCookie = null;

            Log.Information("Initializing Anti-Scrapping Cookie.");
            try
            {
                var httpClient = new HttpClient {
                    Timeout = TimeSpan.FromSeconds(60)
                };
                httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue {
                    NoCache = true
                };

                httpClient.DefaultRequestHeaders.TryAddWithoutValidation(
                    "user-agent",
                    ScraperConfigurations.GetSupportedAgent());

                var requester = new HttpClientRequester(httpClient);
                var config    = Configuration.Default.WithRequester(requester)
                                .WithDefaultLoader(new LoaderOptions {
                    IsResourceLoadingEnabled = true
                }).WithJs();

                var context  = BrowsingContext.New(config);
                var document = await context.OpenAsync(ScraperConfigurations.StoresJsonUrl).WaitUntilAvailable();

                var content = document.Body.TextContent;
                var match   = Regex.Match(content, @"Server\sError\s+406");
                if (!match.Success && !string.IsNullOrWhiteSpace(content))
                {
                    var parametersMatch = this.RegexCall.Match(content);
                    if (parametersMatch.Success)
                    {
                        var cookieName = parametersMatch.Groups[1].Value.Trim();

                        var toNumbersACall = this.RegexA.Match(content).Groups[1].Value;
                        var toNumbersBCall = this.RegexB.Match(content).Groups[1].Value;
                        var toNumbersCCall = this.RegexC.Match(content).Groups[1].Value;

                        var parameters = parametersMatch.Groups[2].Value;
                        parameters = parameters.Replace("a", "%A%").Replace("b", "%B%").Replace("c", "%C%");
                        parameters = parameters.Replace("%A%", toNumbersACall).Replace("%B%", toNumbersBCall)
                                     .Replace("%C%", toNumbersCCall);

                        // Review: looks like the WaitUntilAvailable method is not working properly.
                        var cookieValue = string.Empty;
                        while (string.IsNullOrWhiteSpace(cookieValue))
                        {
                            try
                            {
                                cookieValue = document.ExecuteScript($"toHex(slowAES.decrypt({parameters}))")
                                              .ToString();
                            }
                            catch (Exception e)
                            {
                                Log.Warning(e, "Error retrieving the Anti-Scrapping cookie.");

                                await Task.Delay(100);
                            }
                        }

                        Log.Information(
                            "Read cookie '{CookieName}' with value '{CookieValue}'.",
                            cookieName,
                            cookieValue);

                        antiScrappingCookie = new Cookie(cookieName, cookieValue, "/", "www.tuenvio.cu");
                    }
                }
            }
            catch (Exception e)
            {
                Log.Warning(e, "Error evaluating the Anti-Scrapping cookie.");
            }

            return(antiScrappingCookie);
        }
Esempio n. 2
0
        // This method gets called by the runtime. Use this method to add services to the container.
        // For more information on how to configure your application, visit https://go.microsoft.com/fwlink/?LinkID=398940
        public void ConfigureServices(IServiceCollection services)
        {
            services.AddControllersWithViews();
            services.AddRazorPages();

            services.AddSignalR();

            // TODO: Change the ServiceLifetime. The usage of ServiceLifetime.Transient is because multiple threads operations are running in the same dbcontext.
            services.AddDbContext <DbContext, ApplicationDbContext>(ServiceLifetime.Transient);
            services.AddOrcEntityFrameworkCore();
            services.AddDatabaseSeeder <ApplicationDbSeeder>();

            var token = this.Configuration.GetSection("TelegramBot")?["Token"];

            if (!string.IsNullOrWhiteSpace(token))
            {
                if (token == "%TELEGRAM_BOT_TOKEN%")
                {
                    Log.Warning(
                        "Telegram notification is disable. Replace %TELEGRAM_BOT_TOKEN% placeholder in the configuration file with a valid bot token.");
                }
                else
                {
                    Log.Information("Telegram notification is enable.");

                    services.AddTransient <ITelegramBotClient>(
                        sp => new TelegramBotClient(token));
                    services.AddSingleton <ITelegramCommander, TelegramCommander>();
                }
            }
            else
            {
                Log.Warning(
                    "Telegram notification is disable. To enable it, add a TelegramBot section with a key Token.");
            }

            HttpClientExtensions.Configure(this.Configuration);

            services.AddTransient(sp => new CookieContainer());

            services.AddTransient(sp => BrowsingContext.New(AngleSharp.Configuration.Default));

            services.AddTransient(
                sp =>
            {
                var cookieContainer = sp.GetService <CookieContainer>();

                var handler = new HttpClientHandler
                {
                    AutomaticDecompression =
                        DecompressionMethods.GZip | DecompressionMethods.Deflate
                        | DecompressionMethods.Brotli,
                    AllowAutoRedirect = true
                };

                if (cookieContainer != null)
                {
                    handler.CookieContainer = cookieContainer;
                }

                var httpTimeoutInSeconds = this.Configuration.GetSection("Http")?["TimeoutInSeconds"];
                var httpClient           = new HttpClient(handler)
                {
                    Timeout = float.TryParse(
                        httpTimeoutInSeconds,
                        out var timeoutInSeconds)
                                                               ? TimeSpan.FromSeconds(timeoutInSeconds)
                                                               : ScraperConfigurations.HttpClientTimeout
                };

                httpClient.DefaultRequestHeaders.TryAddWithoutValidation(
                    "user-agent",
                    ScraperConfigurations.GetSupportedAgent());

                httpClient.DefaultRequestHeaders.TryAddWithoutValidation(
                    "accept-encoding",
                    "gzip, deflate, br");
                httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue {
                    NoCache = true
                };

                return(httpClient);
            });

            services.AddScoped <IStoreService, StoreService>();

            services.AddSingleton <ICacheStorage <string, Product> >(
                provider => new CacheStorage <string, Product>(storeNullValues: true));
            services.AddSingleton <ICacheStorage <string, Department> >(
                provider => new CacheStorage <string, Department>(storeNullValues: true));
            services.AddSingleton <ICacheStorage <string, Store> >(
                provider => new CacheStorage <string, Store>(storeNullValues: true));

            services.AddSingleton <ICookiesAwareHttpClientFactory, CookiesAwareHttpClientFactory>();
            services.AddSingleton <IOfficialStoreInfoService, OfficialStoreInfoService>();

            services.AddTransient <IEntityScraper <Product>, ProductScraper>();
            services.AddTransient <IEntityScraper <Department>, DepartmentScraper>();
            services.AddTransient <IEntityScraper <Store>, StoreScraper>();

            services.AddTransient <IMultiEntityScraper <Product>, InspectDepartmentProductsScraper>();
            services.AddTransient <IMultiEntityScraper <Department>, InspectStoreDepartmentsScraper>();

            services.AddSingleton <ImportStoresHostedService>();

            services.AddHostedService <AuthenticationHostedService>();
            services.AddHostedService <DepartmentMonitorHostedService>();
            // services.AddHostedService<ProductMonitorHostedService>();
            services.AddHostedService <StoreMonitorHostedService>();
            services.AddHostedService <CookieSerializationHostedService>();
            // services.AddHostedService<SyncUsersFromTelegramHostedService>();
        }
Esempio n. 3
0
        private async Task <Dictionary <string, Cookie> > LoginAsync(
            Cookie antiScrappingCookie,
            string url,
            string username,
            string password,
            bool unattended)
        {
            // TODO: Improve this.
            var storeSlug                    = UriHelper.GetStoreSlug(url);
            var storeCaptchaFilePath         = $"captchas/{storeSlug}.jpg";
            var storeCaptchaSolutionFilePath = $"captchas/{storeSlug}.txt";

            Log.Information("Authenticating in TuEnvio as {username}", username);

            var signInUrl  = url.Replace("/Products?depPid=0", "/signin.aspx");
            var captchaUrl = url.Replace("/Products?depPid=0", "/captcha.ashx");

            var cookieContainer = new CookieContainer();

            string captchaFilePath = null;
            var    captchaText     = string.Empty;

            var isAuthenticated = false;
            var attempts        = 0;
            CookieCollection httpHandlerCookieCollection = null;

            do
            {
                attempts++;

                var httpMessageHandler = new HttpClientHandler {
                    CookieContainer = cookieContainer
                };
                if (antiScrappingCookie != null)
                {
                    cookieContainer.Add(ScraperConfigurations.CookieCollectionUrl, antiScrappingCookie);
                }

                var httpClient = new HttpClient(httpMessageHandler)
                {
                    Timeout = ScraperConfigurations.HttpClientTimeout
                };
                httpClient.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue {
                    NoCache = true
                };
                httpClient.DefaultRequestHeaders.TryAddWithoutValidation(
                    "user-agent",
                    ScraperConfigurations.GetSupportedAgent());

                var       browsingContext    = BrowsingContext.New(Configuration.Default.WithJs());
                IDocument signinPageDocument = null;
                try
                {
                    var httpResponseMessage = await httpClient.GetCaptchaSaveAsync(signInUrl);

                    if (httpResponseMessage?.Content != null)
                    {
                        var signinPageContent = await httpResponseMessage.Content.ReadAsStringAsync();

                        signinPageDocument = await browsingContext.OpenAsync(req => req.Content(signinPageContent));
                    }
                }
                catch (Exception e)
                {
                    Log.Warning(e, "Error retrieving sign in page with url '{Url}'", signInUrl);
                }

                Dictionary <string, string> signInParameters = null;
                try
                {
                    signInParameters = await BuildSignInParametersAsync(username, password, signinPageDocument);
                }
                catch (Exception e)
                {
                    Log.Warning(e, "Error building sign in parameters for '{Url}'", signInUrl);
                }

                if (signInParameters != null)
                {
                    var captchaImg    = signinPageDocument.QuerySelector <IElement>("#cphPage_Login_captch");
                    var captchaImgSrc = captchaImg?.Attributes["src"]?.Value;
                    if (!string.IsNullOrWhiteSpace(captchaImgSrc))
                    {
                        captchaUrl = captchaUrl.Replace("captcha.ashx", captchaImgSrc);
                    }

                    captchaFilePath = await DownloadCaptchaAsync(httpClient, captchaUrl);

                    if (!string.IsNullOrWhiteSpace(captchaFilePath) && File.Exists(captchaFilePath))
                    {
                        if (unattended)
                        {
                            captchaText = GetCaptchaText(captchaFilePath);
                        }
                        else
                        {
                            File.Delete(storeCaptchaSolutionFilePath);
                            File.Copy(captchaFilePath, storeCaptchaFilePath, true);
                            while (File.Exists(storeCaptchaFilePath) && !File.Exists(storeCaptchaSolutionFilePath))
                            {
                                await Task.Delay(1000);
                            }

                            captchaText = await File.ReadAllTextAsync(storeCaptchaSolutionFilePath);

                            File.Delete(storeCaptchaFilePath);
                            File.Delete(storeCaptchaSolutionFilePath);
                        }

                        if (!string.IsNullOrWhiteSpace(captchaText))
                        {
                            signInParameters.Add("ctl00$cphPage$Login$capcha", captchaText);
                            try
                            {
                                await httpClient.PostAsync(signInUrl, new FormUrlEncodedContent(signInParameters));

                                httpHandlerCookieCollection =
                                    cookieContainer.GetCookies(ScraperConfigurations.CookieCollectionUrl);
                                isAuthenticated =
                                    !string.IsNullOrWhiteSpace(httpHandlerCookieCollection["ShopMSAuth"]?.Value);
                            }
                            catch (Exception e)
                            {
                                Log.Warning(e, "Error authenticating in '{Url}'", signInUrl);
                            }
                        }
                    }

                    try
                    {
                        if (!isAuthenticated)
                        {
                            File.Delete(captchaFilePath);
                        }
                    }
                    catch (Exception e)
                    {
                        Log.Warning(e, "Error deleting captcha file '{FilePath}'", captchaFilePath);
                    }
                }
            }while (attempts < 5 && !isAuthenticated);

            if (isAuthenticated)
            {
                try
                {
                    File.Move(captchaFilePath, $"captchas/{captchaText}.jpg", true);
                }
                catch (Exception e)
                {
                    Log.Warning(e, "Error moving captcha file {FilePath}", captchaFilePath);
                }
            }

            var cookiesCollection = new Dictionary <string, Cookie>();

            if (httpHandlerCookieCollection != null)
            {
                foreach (Cookie cookie in httpHandlerCookieCollection)
                {
                    if (!string.IsNullOrWhiteSpace(cookie.Value))
                    {
                        cookiesCollection[cookie.Name] = cookie;
                    }
                }
            }

            if (antiScrappingCookie != null)
            {
                cookiesCollection[antiScrappingCookie.Name] = antiScrappingCookie;
            }

            return(cookiesCollection);
        }