private SerpModuleRequestStats FindRequestPosition(SerpModuleRequest request, IRuntimeModule <string, List <SerpWebPage> > dataProvider, string targetPage) { var requestStats = new SerpModuleRequestStats(); Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request..."]); var result = RuntimeTask.Run(this.Context, dataProvider, request.Text); if (result.IsSuccessfully) { var target = result.Data.Where(page => page.Url.Contains(targetPage.ToLower())).SingleOrDefault(); if (target != null) { requestStats.Request = request; requestStats.Position = target.Position; requestStats.DetectionTime = DateTime.Now; Context.LogInform(Context.Localizer[$"Request '{request.Text}' found. Position : {target.Position}"]); } else { Context.LogInform(Context.Localizer[$"Request '{request.Text}' not found"]); } Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request was completed"]); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, dataProvider.Name]); }
private Stream FetchData(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string request, int?page) { if (httpModule == null) { throw new NullReferenceException("Http module"); } this.Context.LogInform(this.Context.Localizer[$"Request execution '{request}'"]); StringBuilder queryBuilder = new StringBuilder(); queryBuilder.Append($"?user={Configuration.User}&key={Configuration.Key}&query={request}"); if (!string.IsNullOrEmpty(Configuration.Region)) { queryBuilder.Append($"&lr={Configuration.Region}"); } if (!string.IsNullOrEmpty(Configuration.Filter)) { queryBuilder.Append($"&filter={Configuration.Filter}"); } if (page.HasValue && page.Value > 0) { queryBuilder.Append($"&page={page.Value}"); } UriBuilder uri = new UriBuilder(); uri.Host = "yandex.ru/search/xml"; uri.Scheme = "http"; uri.Query = queryBuilder.ToString(); var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = uri.ToString() }); if (result.IsSuccessfully) { return(result.Data.GetContent()); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]); }
public List <SerpWebPage> Run(string config) { List <SerpWebPage> pages = new List <SerpWebPage>(); int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1; var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); XmlYandexSerpParser yandexXMLParser = new XmlYandexSerpParser(); IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Http Module module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } for (int i = 0; i < numberOfRequest; i++) { using (var stream = FetchData(httpModule, config, i)) { var result = RuntimeTask.Run(Context, yandexXMLParser, stream); if (result.IsSuccessfully) { pages.AddRange(result.Data); } else { throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, yandexXMLParser.Name]); } } } return(pages); }
public LexModuleStats Run(MedianFrequencyAnalyserConfig config) { List <LexicalTextAnalyzerStatistics> lexTextStatistics = new List <LexicalTextAnalyzerStatistics>(); var lexer = new HtmlLexer(); var lexTextAnalyzer = new LexicalTextAnalyzer(); foreach (var page in config.Pages.Take(10)) { var httpResult = RuntimeTask.Run(Context, config.HttpModule, new HttpProviderRuntimeConfig() { Query = page.Url.ToString() }); if (httpResult.IsSuccessfully) { using (var stream = httpResult.Data.GetContent()) { var lexerResult = RuntimeTask.Run(Context, lexer, stream); if (lexerResult.Details.Result != ActionExecutionResult.Error) { var lexTextAnalyzerResult = RuntimeTask.Run(Context, lexTextAnalyzer, new LexicalTextAnalyzerConfig() { Page = page, Tokens = lexerResult.Data, Filter = token => { return(token.Value.Count() > 3); } }); if (lexTextAnalyzerResult.Details.Result != ActionExecutionResult.Error) { lexTextStatistics.Add(lexTextAnalyzerResult.Data); } } } } } return(Analyze(lexTextStatistics)); }
public AntigateCreateTaskResponse CreatTask(AntigateTaskConfig task, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule) { var creatTaskObject = new AntigateCreateTaskRequest(Configuration.ClientKey, Configuration.LanguagePool, Configuration.SoftId, Configuration.CallbackUrl); creatTaskObject.Task = task; HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig(); httpConfig.Query = "https://api.anti-captcha.com/createTask"; httpConfig.IsPost = true; httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json"); var result = RuntimeTask.Run(Context, httpModule, httpConfig); if (result.Details.Result != ActionExecutionResult.Error) { using (var stream = result.Data.GetContent()) { return(DeserializeJsonToObject <AntigateCreateTaskResponse>(stream)); } } return(null); }
private void FillSerpPageDetails(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <SerpWebPage> pages) { if (httpModule == null) { throw new NullReferenceException("httpModule"); } foreach (var page in pages) { try { var serpUrl = new Uri(page.Url); var targetBuilder = new UriBuilder(serpUrl.Scheme, serpUrl.Host, serpUrl.Port, "favicon.ico"); var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = targetBuilder.Uri.ToString() }); if (result.IsSuccessfully) { using (var strean = result.Data.GetContent()) { using (MemoryStream ms = new MemoryStream()) { strean.CopyTo(ms); page.Favicon = ms.ToArray(); } } } } catch (Exception exc) { } } }
private List <YandexHtmlSerpItem> ExecuteRequest(string query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule) { List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>(); var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = query }); if (httpResult.IsSuccessfully) { using (var stream = httpResult.Data.GetContent()) { var parserResult = RuntimeTask.Run(Context, parser, stream); if (!parserResult.IsSuccessfully) { throw parserResult.Error; } result.AddRange(parserResult.Data); } } return(result); }
private LexModuleStats Analyze(Request request) { if (Configuration.DataProvider == null) { throw new UserException("Data provider not specified"); } var registry = this.Context.Provider.GetService <IProjectModuleRegistry>(); var module = registry.OpenModule(Context, Configuration.DataProvider.Code, Configuration.DataProvider.Id); var dataProvider = module as IRuntimeModule <string, List <SerpWebPage> >; if (dataProvider == null) { throw new UserException("Data provider is incorrected"); } IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null; if (Configuration.HttpModuleId.HasValue) { var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value); if (targetModule == null) { throw new NullReferenceException("Http Module module"); } httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>; } ModuleTaskResult <List <SerpWebPage> > dataResult; var iteration = 0; do { iteration++; if (iteration > 1) { Context.LogInform(Context.Localizer[$"The attemp {iteration} to get position of '{request.Text}'"]); } dataResult = RuntimeTask.Run(this.Context, dataProvider, request.Text); if (dataResult.IsSuccessfully) { break; } } while (iteration <= Configuration.RequestAttempt); if (!dataResult.IsSuccessfully) { throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]); } FillSerpPageDetails(httpModule, dataResult.Data); var analyser = new MedianFrequencyAnalyser(); var result = RuntimeTask.Run(Context, analyser, new MedianFrequencyAnalyserConfig() { Pages = dataResult.Data, Request = request, HttpModule = httpModule }); if (result.IsSuccessfully) { return(result.Data); } throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, analyser.Name]); }
private void TryExtractDetaisl(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string url, out string redirectUrl, out string title) { redirectUrl = string.Empty; title = string.Empty; var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = url }); if (httpResult.IsSuccessfully) { var extractor = new HtmlTextAttributeParser() { Attribute = "content", PostHandle = (value) => { string urlTag = "URL="; int startIndex = value.IndexOf(urlTag); if (startIndex != -1) { value = value.Substring(startIndex + urlTag.Length, value.Length - (urlTag.Length + startIndex)); value = value.Trim('\''); } return(value); } }; var tagConfig = new HtmlTagPageConfig(); tagConfig.Tags.Add("redirecturl", new SinglHtmlNodeSelector() { Tag = new TagProperties() { Path = @"//meta[@http-equiv='refresh']", Extractor = extractor } }); tagConfig.Tags.Add("title", new SinglHtmlNodeSelector() { Tag = new TagProperties() { Path = @"//title", Extractor = new HtmlTagInnerText() } }); using (var stream = httpResult.Data.GetContent()) { tagConfig.Stream = stream; var parseResult = RuntimeTask.Run(Context, tagParser, tagConfig); if (parseResult.IsSuccessfully) { if (parseResult.Data.Values["redirecturl"] != null) { redirectUrl = (string)parseResult.Data.Values["redirecturl"]; } if (parseResult.Data.Values["title"] != null) { title = (string)parseResult.Data.Values["title"]; } } } if (!string.IsNullOrEmpty(redirectUrl)) { httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig() { Query = redirectUrl }); if (httpResult.IsSuccessfully) { using (var stream = httpResult.Data.GetContent()) { tagConfig.Stream = stream; var tagResult = tagParser.Run(tagConfig); if (tagResult.Values["title"] != null) { title = (string)tagResult.Values["title"]; } } } } } }