private Stream ExecuteRequest(string query, IRuntimeModule <string, Stream> httpModule) { using (var stream = httpModule.Run(query)) { } return(Stream.Null); }
public static void ScheduledModule(IRuntimeModule module, TimeSpan elapsedTime, TimeSpan coolOffPeriod) { TimeSpan timeLeft = coolOffPeriod - elapsedTime; Log.LogInformation((int)EventIds.ScheduledModule, $"Module '{module.Name}' scheduled to restart after {coolOffPeriod.Humanize()} ({timeLeft.Humanize()} left)."); }
private List <SerpWebPage> Execute(string query, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule) { if (httpModule == null) { throw new NullReferenceException("Http module"); } if (antigateModule == null) { throw new NullReferenceException("Antigate module"); } List <SerpWebPage> result = new List <SerpWebPage>(); int pageLimit = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1; YandexHtmlSerpParser parser = new YandexHtmlSerpParser(); HtmlTagPageParser tagParser = new HtmlTagPageParser(); for (int i = 0; i < pageLimit; i++) { var serpList = ExecuteRequest(new YandexQuery(query, i, Configuration.Region), parser, httpModule, antigateModule); result.AddRange(LoadSerpItemDetails(tagParser, httpModule, serpList)); } return(result); }
bool ShouldRestart(IRuntimeModule module) { // we don't really know what status "Unknown" means if (module.RuntimeStatus == ModuleStatus.Unknown) { throw new ArgumentException("Module's runtime status is unknown which is not a valid status."); } if (module.RuntimeStatus == ModuleStatus.Backoff) { // compute how long we must wait before restarting this module TimeSpan coolOffPeriod = this.GetCoolOffPeriod(module.RestartCount); TimeSpan elapsedTime = DateTime.UtcNow - module.LastExitTimeUtc; bool shouldRestart = elapsedTime > coolOffPeriod; if (!shouldRestart) { Events.ScheduledModule(module, elapsedTime, coolOffPeriod); } return(shouldRestart); } return(false); }
public AntigateTaskResult GetTaskResult(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, AntigateCreateTaskResponse createTaskResponse, Type resulType) { var creatTaskObject = new AntigateGetTaskResultRequest(Configuration.ClientKey, createTaskResponse.TaskId); HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig(); httpConfig.Query = "https://api.anti-captcha.com/getTaskResult"; httpConfig.IsPost = true; httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json"); using (var stream = httpModule.Run(httpConfig).GetContent()) { return(DeserializeJsonToObject <AntigateTaskResult>(resulType, stream)); } }
private Stream FetchData(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string request, int?page) { if (httpModule == null) { throw new NullReferenceException("Http module"); } this.Context.LogInform(this.Context.Localizer[$"Request execution '{request}'"]); StringBuilder queryBuilder = new StringBuilder(); queryBuilder.Append($"?user={Configuration.User}&key={Configuration.Key}&query={request}"); if (!string.IsNullOrEmpty(Configuration.Region)) { queryBuilder.Append($"&lr={Configuration.Region}"); } if (!string.IsNullOrEmpty(Configuration.Filter)) { queryBuilder.Append($"&filter={Configuration.Filter}"); } if (page.HasValue && page.Value > 0) { queryBuilder.Append($"&page={page.Value}"); } UriBuilder uri = new UriBuilder(); uri.Host = "yandex.ru/search/xml"; uri.Scheme = "http"; uri.Query = queryBuilder.ToString(); var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = uri.ToString() }); if (result.IsSuccessfully) { return(result.Data.GetContent()); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]); }
public List <SerpWebPage> Run(string config) { List <SerpWebPage> pages = new List <SerpWebPage>(); int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1; var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); XmlYandexSerpParser yandexXMLParser = new XmlYandexSerpParser(); IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Http Module module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } for (int i = 0; i < numberOfRequest; i++) { using (var stream = FetchData(httpModule, config, i)) { var result = RuntimeTask.Run(Context, yandexXMLParser, stream); if (result.IsSuccessfully) { pages.AddRange(result.Data); } else { throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, yandexXMLParser.Name]); } } } return(pages); }
private string FethcCaptcha(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string query) { if (httpModule == null) { throw new NullReferenceException("Http module"); } using (var stream = httpModule.Run(new HttpProviderRuntimeConfig() { Query = query }).GetContent()) { using (MemoryStream memoryStream = new MemoryStream()) { stream.CopyTo(memoryStream); return(Convert.ToBase64String(memoryStream.ToArray())); } } }
private List <SerpWebPage> LoadSerpItemDetails(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <YandexHtmlSerpItem> serpList) { List <SerpWebPage> result = new List <SerpWebPage>(); foreach (var serpItem in serpList) { var targetUrl = serpItem.Href; var title = string.Empty; TryExtractDetaisl(tagParser, httpModule, serpItem.Href, out targetUrl, out title); SerpWebPage webPage = new SerpWebPage(); webPage.Position = serpItem.Postion; webPage.Url = string.IsNullOrEmpty(targetUrl) ? serpItem.Href: targetUrl; webPage.Title = title; result.Add(webPage); } return(result); }
public List <SerpWebPage> Run(string config) { List <SerpWebPage> result = new List <SerpWebPage>(); int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1; var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Antigate module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule = null; if (Configuration.AntigateId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.AntigateId.Value); if (targetModule == null) { throw new NullReferenceException("Http module"); } antigateModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <AntigateTaskConfig, AntigateTaskResult>; } var serpList = Execute(config, httpModule, antigateModule); return(serpList); }
public async Task <AntigateTaskResult> LoadAsync(AntigateTaskConfig config) { AntigateTaskResult rsult = new AntigateTaskResult(); var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Http module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } var createTaskResponse = CreatTask(config, httpModule); if (createTaskResponse.ErrorId == 0) { do { Thread.Sleep(5000); rsult = GetTaskResult(httpModule, createTaskResponse, config.ResultType); } while (rsult.Status == "processing"); } else { } rsult.Task = config; return(rsult); }
public AntigateCreateTaskResponse CreatTask(AntigateTaskConfig task, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule) { var creatTaskObject = new AntigateCreateTaskRequest(Configuration.ClientKey, Configuration.LanguagePool, Configuration.SoftId, Configuration.CallbackUrl); creatTaskObject.Task = task; HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig(); httpConfig.Query = "https://api.anti-captcha.com/createTask"; httpConfig.IsPost = true; httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json"); var result = RuntimeTask.Run(Context, httpModule, httpConfig); if (result.Details.Result != ActionExecutionResult.Error) { using (var stream = result.Data.GetContent()) { return(DeserializeJsonToObject <AntigateCreateTaskResponse>(stream)); } } return(null); }
private void FillSerpPageDetails(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <SerpWebPage> pages) { if (httpModule == null) { throw new NullReferenceException("httpModule"); } foreach (var page in pages) { try { var serpUrl = new Uri(page.Url); var targetBuilder = new UriBuilder(serpUrl.Scheme, serpUrl.Host, serpUrl.Port, "favicon.ico"); var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = targetBuilder.Uri.ToString() }); if (result.IsSuccessfully) { using (var strean = result.Data.GetContent()) { using (MemoryStream ms = new MemoryStream()) { strean.CopyTo(ms); page.Favicon = ms.ToArray(); } } } } catch (Exception exc) { } } }
private LexModuleStats Analyze(Request request) { if (Configuration.DataProvider == null) { throw new UserException("Data provider not specified"); } var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); var module = registry.OpenModule(Context, Configuration.DataProvider.Code, Configuration.DataProvider.Id); var dataProvider = module as IRuntimeModule <string, List <SerpWebPage> >; if (dataProvider == null) { throw new UserException("Data provider is incorrected"); } IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Http Module module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } ModuleTaskResult <List <SerpWebPage> > dataResult; var iteration = 0; do { iteration++; if (iteration > 1) { Context.LogInform(Context.Localizer[$"The attemp {iteration} to get position of '{request.Text}'"]); } dataResult = RuntimeTask.Run(this.Context, dataProvider, request.Text); if (dataResult.IsSuccessfully) { break; } } while (iteration <= Configuration.RequestAttempt); if (!dataResult.IsSuccessfully) { throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]); } FillSerpPageDetails(httpModule, dataResult.Data); var analyser = new MedianFrequencyAnalyser(); var result = RuntimeTask.Run(Context, analyser, new MedianFrequencyAnalyserConfig() { Pages = dataResult.Data, Request = request, HttpModule = httpModule }); if (result.IsSuccessfully) { return(result.Data); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, analyser.Name]); }
public static void ClearingRestartStats(IRuntimeModule module, TimeSpan intensiveCareTime) { Log.LogInformation((int)EventIds.ClearRestartStats, $"HealthRestartPlanner is clearing restart stats for module '{module.Name}' as it has been running healthy for {intensiveCareTime}."); }
public RestartCommand(IDockerClient client, IRuntimeModule module) { this.client = Preconditions.CheckNotNull(client, nameof(client)); this.module = Preconditions.CheckNotNull(module, nameof(module)); }
private AntigateTaskResult <ImageToTextTaskSolution> ResolveCaptcha(string captchaUrl, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule) { string imageBase64 = FethcCaptcha(httpModule, captchaUrl); var task = new ImageToTextTask(); task.Body = imageBase64; return((AntigateTaskResult <ImageToTextTaskSolution>)antigateModule.Run(task)); }
private List <YandexHtmlSerpItem> ExecuteRequest(YandexQuery query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule) { List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>(); try { result = ExecuteRequest(query.Generate(QueryGenerator), parser, httpModule, antigateModule); } catch (CaptchaException exc) { var captchaResult = ResolveCaptcha(exc.ImageUrl, httpModule, antigateModule); if (string.IsNullOrEmpty(captchaResult.ErrorCode)) { result = ExecuteRequest(new YandexCheckCaptchaQuery(query.Query, query.Page, query.Region, exc.Key, captchaResult.Solution.Text, exc.Retpath), parser, httpModule, antigateModule); } } return(result); }
public bool Equals(IRuntimeModule other) => this.Equals(other as TestModuleBase <TestConfig>);
private SerpModuleRequestStats FindRequestPosition(SerpModuleRequest request, IRuntimeModule <string, List <SerpWebPage> > dataProvider, string targetPage) { var requestStats = new SerpModuleRequestStats(); Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request..."]); var result = RuntimeTask.Run(this.Context, dataProvider, request.Text); if (result.IsSuccessfully) { var target = result.Data.Where(page => page.Url.Contains(targetPage.ToLower())).SingleOrDefault(); if (target != null) { requestStats.Request = request; requestStats.Position = target.Position; requestStats.DetectionTime = DateTime.Now; Context.LogInform(Context.Localizer[$"Request '{request.Text}' found. Position : {target.Position}"]); } else { Context.LogInform(Context.Localizer[$"Request '{request.Text}' not found"]); } Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request was completed"]); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, dataProvider.Name]); }
private List <YandexHtmlSerpItem> ExecuteRequest(string query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule) { List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>(); var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = query }); if (httpResult.IsSuccessfully) { using (var stream = httpResult.Data.GetContent()) { var parserResult = RuntimeTask.Run(Context, parser, stream); if (!parserResult.IsSuccessfully) { throw parserResult.Error; } result.AddRange(parserResult.Data); } } return(result); }
public static ModuleTaskResult <TData> Run <TConfig, TData>(RuntimeContext context, IRuntimeModule <TConfig, TData> runtimeModule, TConfig config) { runtimeModule.Context = context; var task = new RuntimeModuleTask <TConfig, TData>(context, runtimeModule, config); return(task.Run()); }
private void TryExtractDetaisl(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string url, out string redirectUrl, out string title) { redirectUrl = string.Empty; title = string.Empty; var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = url }); if (httpResult.IsSuccessfully) { var extractor = new HtmlTextAttributeParser() { Attribute = "content", PostHandle = (value) => { string urlTag = "URL="; int startIndex = value.IndexOf(urlTag); if (startIndex != -1) { value = value.Substring(startIndex + urlTag.Length, value.Length - (urlTag.Length + startIndex)); value = value.Trim('\''); } return(value); } }; var tagConfig = new HtmlTagPageConfig(); tagConfig.Tags.Add("redirecturl", new SinglHtmlNodeSelector() { Tag = new TagProperties() { Path = @"//meta[@http-equiv='refresh']", Extractor = extractor } }); tagConfig.Tags.Add("title", new SinglHtmlNodeSelector() { Tag = new TagProperties() { Path = @"//title", Extractor = new HtmlTagInnerText() } }); using (var stream = httpResult.Data.GetContent()) { tagConfig.Stream = stream; var parseResult = RuntimeTask.Run(Context, tagParser, tagConfig); if (parseResult.IsSuccessfully) { if (parseResult.Data.Values["redirecturl"] != null) { redirectUrl = (string)parseResult.Data.Values["redirecturl"]; } if (parseResult.Data.Values["title"] != null) { title = (string)parseResult.Data.Values["title"]; } } } if (!string.IsNullOrEmpty(redirectUrl)) { httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = redirectUrl }); if (httpResult.IsSuccessfully) { using (var stream = httpResult.Data.GetContent()) { tagConfig.Stream = stream; var tagResult = tagParser.Run(tagConfig); if (tagResult.Values["title"] != null) { title = (string)tagResult.Values["title"]; } } } } } }