Exemple #1
0
 private Stream ExecuteRequest(string query, IRuntimeModule <string, Stream> httpModule)
 {
     using (var stream = httpModule.Run(query))
     {
     }
     return(Stream.Null);
 }
Exemple #2
0
        public static void ScheduledModule(IRuntimeModule module, TimeSpan elapsedTime, TimeSpan coolOffPeriod)
        {
            TimeSpan timeLeft = coolOffPeriod - elapsedTime;

            Log.LogInformation((int)EventIds.ScheduledModule,
                               $"Module '{module.Name}' scheduled to restart after {coolOffPeriod.Humanize()} ({timeLeft.Humanize()} left).");
        }
Exemple #3
0
        private List <SerpWebPage> Execute(string query, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("Http module");
            }

            if (antigateModule == null)
            {
                throw new NullReferenceException("Antigate module");
            }
            List <SerpWebPage> result = new List <SerpWebPage>();
            int pageLimit             = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1;

            YandexHtmlSerpParser parser = new YandexHtmlSerpParser();

            HtmlTagPageParser tagParser = new HtmlTagPageParser();

            for (int i = 0; i < pageLimit; i++)
            {
                var serpList = ExecuteRequest(new YandexQuery(query, i, Configuration.Region), parser, httpModule, antigateModule);
                result.AddRange(LoadSerpItemDetails(tagParser, httpModule, serpList));
            }

            return(result);
        }
Exemple #4
0
        bool ShouldRestart(IRuntimeModule module)
        {
            // we don't really know what status "Unknown" means
            if (module.RuntimeStatus == ModuleStatus.Unknown)
            {
                throw new ArgumentException("Module's runtime status is unknown which is not a valid status.");
            }

            if (module.RuntimeStatus == ModuleStatus.Backoff)
            {
                // compute how long we must wait before restarting this module
                TimeSpan coolOffPeriod = this.GetCoolOffPeriod(module.RestartCount);
                TimeSpan elapsedTime   = DateTime.UtcNow - module.LastExitTimeUtc;

                bool shouldRestart = elapsedTime > coolOffPeriod;
                if (!shouldRestart)
                {
                    Events.ScheduledModule(module, elapsedTime, coolOffPeriod);
                }

                return(shouldRestart);
            }

            return(false);
        }
Exemple #5
0
        public AntigateTaskResult GetTaskResult(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, AntigateCreateTaskResponse createTaskResponse, Type resulType)
        {
            var creatTaskObject = new AntigateGetTaskResultRequest(Configuration.ClientKey, createTaskResponse.TaskId);

            HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig();

            httpConfig.Query   = "https://api.anti-captcha.com/getTaskResult";
            httpConfig.IsPost  = true;
            httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json");

            using (var stream = httpModule.Run(httpConfig).GetContent())
            {
                return(DeserializeJsonToObject <AntigateTaskResult>(resulType, stream));
            }
        }
        private Stream FetchData(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string request, int?page)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("Http module");
            }

            this.Context.LogInform(this.Context.Localizer[$"Request execution '{request}'"]);

            StringBuilder queryBuilder = new StringBuilder();

            queryBuilder.Append($"?user={Configuration.User}&key={Configuration.Key}&query={request}");

            if (!string.IsNullOrEmpty(Configuration.Region))
            {
                queryBuilder.Append($"&lr={Configuration.Region}");
            }

            if (!string.IsNullOrEmpty(Configuration.Filter))
            {
                queryBuilder.Append($"&filter={Configuration.Filter}");
            }

            if (page.HasValue && page.Value > 0)
            {
                queryBuilder.Append($"&page={page.Value}");
            }

            UriBuilder uri = new UriBuilder();

            uri.Host   = "yandex.ru/search/xml";
            uri.Scheme = "http";
            uri.Query  = queryBuilder.ToString();

            var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = uri.ToString()
            });

            if (result.IsSuccessfully)
            {
                return(result.Data.GetContent());
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]);
        }
        public List <SerpWebPage> Run(string config)
        {
            List <SerpWebPage> pages = new List <SerpWebPage>();

            int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1;

            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();



            XmlYandexSerpParser yandexXMLParser = new XmlYandexSerpParser();

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http Module module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            for (int i = 0; i < numberOfRequest; i++)
            {
                using (var stream = FetchData(httpModule, config, i))
                {
                    var result = RuntimeTask.Run(Context, yandexXMLParser, stream);
                    if (result.IsSuccessfully)
                    {
                        pages.AddRange(result.Data);
                    }
                    else
                    {
                        throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, yandexXMLParser.Name]);
                    }
                }
            }

            return(pages);
        }
Exemple #8
0
        private string FethcCaptcha(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string query)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("Http module");
            }

            using (var stream = httpModule.Run(new HttpProviderRuntimeConfig()
            {
                Query = query
            }).GetContent())
            {
                using (MemoryStream memoryStream = new MemoryStream())
                {
                    stream.CopyTo(memoryStream);
                    return(Convert.ToBase64String(memoryStream.ToArray()));
                }
            }
        }
Exemple #9
0
        private List <SerpWebPage> LoadSerpItemDetails(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <YandexHtmlSerpItem> serpList)
        {
            List <SerpWebPage> result = new List <SerpWebPage>();

            foreach (var serpItem in serpList)
            {
                var targetUrl = serpItem.Href;
                var title     = string.Empty;

                TryExtractDetaisl(tagParser, httpModule, serpItem.Href, out targetUrl, out title);

                SerpWebPage webPage = new SerpWebPage();
                webPage.Position = serpItem.Postion;

                webPage.Url   = string.IsNullOrEmpty(targetUrl) ? serpItem.Href: targetUrl;
                webPage.Title = title;
                result.Add(webPage);
            }

            return(result);
        }
Exemple #10
0
        public List <SerpWebPage> Run(string config)
        {
            List <SerpWebPage> result = new List <SerpWebPage>();

            int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1;

            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Antigate module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule = null;

            if (Configuration.AntigateId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.AntigateId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http module");
                }

                antigateModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <AntigateTaskConfig, AntigateTaskResult>;
            }

            var serpList = Execute(config, httpModule, antigateModule);

            return(serpList);
        }
Exemple #11
0
        public async Task <AntigateTaskResult> LoadAsync(AntigateTaskConfig config)
        {
            AntigateTaskResult rsult = new AntigateTaskResult();

            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            var createTaskResponse = CreatTask(config, httpModule);

            if (createTaskResponse.ErrorId == 0)
            {
                do
                {
                    Thread.Sleep(5000);
                    rsult = GetTaskResult(httpModule, createTaskResponse, config.ResultType);
                } while (rsult.Status == "processing");
            }
            else
            {
            }

            rsult.Task = config;

            return(rsult);
        }
Exemple #12
0
        public AntigateCreateTaskResponse CreatTask(AntigateTaskConfig task, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule)
        {
            var creatTaskObject = new AntigateCreateTaskRequest(Configuration.ClientKey, Configuration.LanguagePool, Configuration.SoftId, Configuration.CallbackUrl);

            creatTaskObject.Task = task;

            HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig();

            httpConfig.Query   = "https://api.anti-captcha.com/createTask";
            httpConfig.IsPost  = true;
            httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json");

            var result = RuntimeTask.Run(Context, httpModule, httpConfig);

            if (result.Details.Result != ActionExecutionResult.Error)
            {
                using (var stream = result.Data.GetContent())
                {
                    return(DeserializeJsonToObject <AntigateCreateTaskResponse>(stream));
                }
            }

            return(null);
        }
Exemple #13
0
        private void FillSerpPageDetails(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <SerpWebPage> pages)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("httpModule");
            }

            foreach (var page in pages)
            {
                try
                {
                    var serpUrl       = new Uri(page.Url);
                    var targetBuilder = new UriBuilder(serpUrl.Scheme, serpUrl.Host, serpUrl.Port, "favicon.ico");

                    var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
                    {
                        Query = targetBuilder.Uri.ToString()
                    });

                    if (result.IsSuccessfully)
                    {
                        using (var strean = result.Data.GetContent())
                        {
                            using (MemoryStream ms = new MemoryStream())
                            {
                                strean.CopyTo(ms);
                                page.Favicon = ms.ToArray();
                            }
                        }
                    }
                }
                catch (Exception exc)
                {
                }
            }
        }
Exemple #14
0
        private LexModuleStats Analyze(Request request)
        {
            if (Configuration.DataProvider == null)
            {
                throw new UserException("Data provider not specified");
            }
            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();

            var module = registry.OpenModule(Context, Configuration.DataProvider.Code, Configuration.DataProvider.Id);

            var dataProvider = module as IRuntimeModule <string, List <SerpWebPage> >;

            if (dataProvider == null)
            {
                throw new UserException("Data provider is incorrected");
            }

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http Module module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            ModuleTaskResult <List <SerpWebPage> > dataResult;

            var iteration = 0;

            do
            {
                iteration++;
                if (iteration > 1)
                {
                    Context.LogInform(Context.Localizer[$"The attemp {iteration} to get position of '{request.Text}'"]);
                }

                dataResult = RuntimeTask.Run(this.Context, dataProvider, request.Text);

                if (dataResult.IsSuccessfully)
                {
                    break;
                }
            } while (iteration <= Configuration.RequestAttempt);

            if (!dataResult.IsSuccessfully)
            {
                throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]);
            }

            FillSerpPageDetails(httpModule, dataResult.Data);

            var analyser = new MedianFrequencyAnalyser();

            var result = RuntimeTask.Run(Context, analyser, new MedianFrequencyAnalyserConfig()
            {
                Pages = dataResult.Data, Request = request, HttpModule = httpModule
            });

            if (result.IsSuccessfully)
            {
                return(result.Data);
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, analyser.Name]);
        }
Exemple #15
0
 public static void ClearingRestartStats(IRuntimeModule module, TimeSpan intensiveCareTime)
 {
     Log.LogInformation((int)EventIds.ClearRestartStats, $"HealthRestartPlanner is clearing restart stats for module '{module.Name}' as it has been running healthy for {intensiveCareTime}.");
 }
Exemple #16
0
 public RestartCommand(IDockerClient client, IRuntimeModule module)
 {
     this.client = Preconditions.CheckNotNull(client, nameof(client));
     this.module = Preconditions.CheckNotNull(module, nameof(module));
 }
Exemple #17
0
        private AntigateTaskResult <ImageToTextTaskSolution> ResolveCaptcha(string captchaUrl, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule)
        {
            string imageBase64 = FethcCaptcha(httpModule, captchaUrl);
            var    task        = new ImageToTextTask();

            task.Body = imageBase64;

            return((AntigateTaskResult <ImageToTextTaskSolution>)antigateModule.Run(task));
        }
Exemple #18
0
        private List <YandexHtmlSerpItem> ExecuteRequest(YandexQuery query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule)
        {
            List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>();

            try
            {
                result = ExecuteRequest(query.Generate(QueryGenerator), parser, httpModule, antigateModule);
            }
            catch (CaptchaException exc)
            {
                var captchaResult = ResolveCaptcha(exc.ImageUrl, httpModule, antigateModule);

                if (string.IsNullOrEmpty(captchaResult.ErrorCode))
                {
                    result = ExecuteRequest(new YandexCheckCaptchaQuery(query.Query, query.Page, query.Region, exc.Key, captchaResult.Solution.Text, exc.Retpath), parser, httpModule, antigateModule);
                }
            }

            return(result);
        }
Exemple #19
0
 public bool Equals(IRuntimeModule other) => this.Equals(other as TestModuleBase <TestConfig>);
Exemple #20
0
        private SerpModuleRequestStats FindRequestPosition(SerpModuleRequest request, IRuntimeModule <string, List <SerpWebPage> > dataProvider, string targetPage)
        {
            var requestStats = new SerpModuleRequestStats();

            Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request..."]);

            var result = RuntimeTask.Run(this.Context, dataProvider, request.Text);

            if (result.IsSuccessfully)
            {
                var target = result.Data.Where(page => page.Url.Contains(targetPage.ToLower())).SingleOrDefault();
                if (target != null)
                {
                    requestStats.Request       = request;
                    requestStats.Position      = target.Position;
                    requestStats.DetectionTime = DateTime.Now;

                    Context.LogInform(Context.Localizer[$"Request '{request.Text}' found. Position : {target.Position}"]);
                }
                else
                {
                    Context.LogInform(Context.Localizer[$"Request '{request.Text}' not found"]);
                }

                Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request was completed"]);
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, dataProvider.Name]);
        }
Exemple #21
0
        private List <YandexHtmlSerpItem> ExecuteRequest(string query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule)
        {
            List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>();

            var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = query
            });

            if (httpResult.IsSuccessfully)
            {
                using (var stream = httpResult.Data.GetContent())
                {
                    var parserResult = RuntimeTask.Run(Context, parser, stream);
                    if (!parserResult.IsSuccessfully)
                    {
                        throw parserResult.Error;
                    }

                    result.AddRange(parserResult.Data);
                }
            }
            return(result);
        }
Exemple #22
0
        public static ModuleTaskResult <TData> Run <TConfig, TData>(RuntimeContext context, IRuntimeModule <TConfig, TData> runtimeModule, TConfig config)
        {
            runtimeModule.Context = context;
            var task = new RuntimeModuleTask <TConfig, TData>(context, runtimeModule, config);

            return(task.Run());
        }
Exemple #23
0
        private void TryExtractDetaisl(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string url, out string redirectUrl, out string title)
        {
            redirectUrl = string.Empty;
            title       = string.Empty;

            var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = url
            });

            if (httpResult.IsSuccessfully)
            {
                var extractor = new HtmlTextAttributeParser()
                {
                    Attribute  = "content",
                    PostHandle = (value) =>
                    {
                        string urlTag     = "URL=";
                        int    startIndex = value.IndexOf(urlTag);
                        if (startIndex != -1)
                        {
                            value = value.Substring(startIndex + urlTag.Length, value.Length - (urlTag.Length + startIndex));
                            value = value.Trim('\'');
                        }
                        return(value);
                    }
                };

                var tagConfig = new HtmlTagPageConfig();
                tagConfig.Tags.Add("redirecturl", new SinglHtmlNodeSelector()
                {
                    Tag = new TagProperties()
                    {
                        Path = @"//meta[@http-equiv='refresh']", Extractor = extractor
                    }
                });
                tagConfig.Tags.Add("title", new SinglHtmlNodeSelector()
                {
                    Tag = new TagProperties()
                    {
                        Path = @"//title", Extractor = new HtmlTagInnerText()
                    }
                });

                using (var stream = httpResult.Data.GetContent())
                {
                    tagConfig.Stream = stream;

                    var parseResult = RuntimeTask.Run(Context, tagParser, tagConfig);
                    if (parseResult.IsSuccessfully)
                    {
                        if (parseResult.Data.Values["redirecturl"] != null)
                        {
                            redirectUrl = (string)parseResult.Data.Values["redirecturl"];
                        }

                        if (parseResult.Data.Values["title"] != null)
                        {
                            title = (string)parseResult.Data.Values["title"];
                        }
                    }
                }

                if (!string.IsNullOrEmpty(redirectUrl))
                {
                    httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
                    {
                        Query = redirectUrl
                    });
                    if (httpResult.IsSuccessfully)
                    {
                        using (var stream = httpResult.Data.GetContent())
                        {
                            tagConfig.Stream = stream;
                            var tagResult = tagParser.Run(tagConfig);

                            if (tagResult.Values["title"] != null)
                            {
                                title = (string)tagResult.Values["title"];
                            }
                        }
                    }
                }
            }
        }