예제 #1
0
        private SerpModuleRequestStats FindRequestPosition(SerpModuleRequest request, IRuntimeModule <string, List <SerpWebPage> > dataProvider, string targetPage)
        {
            var requestStats = new SerpModuleRequestStats();

            Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request..."]);

            var result = RuntimeTask.Run(this.Context, dataProvider, request.Text);

            if (result.IsSuccessfully)
            {
                var target = result.Data.Where(page => page.Url.Contains(targetPage.ToLower())).SingleOrDefault();
                if (target != null)
                {
                    requestStats.Request       = request;
                    requestStats.Position      = target.Position;
                    requestStats.DetectionTime = DateTime.Now;

                    Context.LogInform(Context.Localizer[$"Request '{request.Text}' found. Position : {target.Position}"]);
                }
                else
                {
                    Context.LogInform(Context.Localizer[$"Request '{request.Text}' not found"]);
                }

                Context.LogInform(Context.Localizer[$"Analysis position of '{request.Text}' request was completed"]);
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, dataProvider.Name]);
        }
예제 #2
0
        private Stream FetchData(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string request, int?page)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("Http module");
            }

            this.Context.LogInform(this.Context.Localizer[$"Request execution '{request}'"]);

            StringBuilder queryBuilder = new StringBuilder();

            queryBuilder.Append($"?user={Configuration.User}&key={Configuration.Key}&query={request}");

            if (!string.IsNullOrEmpty(Configuration.Region))
            {
                queryBuilder.Append($"&lr={Configuration.Region}");
            }

            if (!string.IsNullOrEmpty(Configuration.Filter))
            {
                queryBuilder.Append($"&filter={Configuration.Filter}");
            }

            if (page.HasValue && page.Value > 0)
            {
                queryBuilder.Append($"&page={page.Value}");
            }

            UriBuilder uri = new UriBuilder();

            uri.Host   = "yandex.ru/search/xml";
            uri.Scheme = "http";
            uri.Query  = queryBuilder.ToString();

            var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = uri.ToString()
            });

            if (result.IsSuccessfully)
            {
                return(result.Data.GetContent());
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]);
        }
예제 #3
0
        public List <SerpWebPage> Run(string config)
        {
            List <SerpWebPage> pages = new List <SerpWebPage>();

            int numberOfRequest = Configuration.PageLimit.HasValue ? Configuration.PageLimit.Value : 1;

            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();



            XmlYandexSerpParser yandexXMLParser = new XmlYandexSerpParser();

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http Module module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            for (int i = 0; i < numberOfRequest; i++)
            {
                using (var stream = FetchData(httpModule, config, i))
                {
                    var result = RuntimeTask.Run(Context, yandexXMLParser, stream);
                    if (result.IsSuccessfully)
                    {
                        pages.AddRange(result.Data);
                    }
                    else
                    {
                        throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, yandexXMLParser.Name]);
                    }
                }
            }

            return(pages);
        }
예제 #4
0
        public LexModuleStats Run(MedianFrequencyAnalyserConfig config)
        {
            List <LexicalTextAnalyzerStatistics> lexTextStatistics = new List <LexicalTextAnalyzerStatistics>();

            var lexer           = new HtmlLexer();
            var lexTextAnalyzer = new LexicalTextAnalyzer();

            foreach (var page in config.Pages.Take(10))
            {
                var httpResult = RuntimeTask.Run(Context, config.HttpModule, new HttpProviderRuntimeConfig()
                {
                    Query = page.Url.ToString()
                });
                if (httpResult.IsSuccessfully)
                {
                    using (var stream = httpResult.Data.GetContent())
                    {
                        var lexerResult = RuntimeTask.Run(Context, lexer, stream);

                        if (lexerResult.Details.Result != ActionExecutionResult.Error)
                        {
                            var lexTextAnalyzerResult = RuntimeTask.Run(Context, lexTextAnalyzer, new LexicalTextAnalyzerConfig()
                            {
                                Page = page, Tokens = lexerResult.Data, Filter = token => { return(token.Value.Count() > 3); }
                            });

                            if (lexTextAnalyzerResult.Details.Result != ActionExecutionResult.Error)
                            {
                                lexTextStatistics.Add(lexTextAnalyzerResult.Data);
                            }
                        }
                    }
                }
            }

            return(Analyze(lexTextStatistics));
        }
예제 #5
0
        public AntigateCreateTaskResponse CreatTask(AntigateTaskConfig task, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule)
        {
            var creatTaskObject = new AntigateCreateTaskRequest(Configuration.ClientKey, Configuration.LanguagePool, Configuration.SoftId, Configuration.CallbackUrl);

            creatTaskObject.Task = task;

            HttpProviderRuntimeConfig httpConfig = new HttpProviderRuntimeConfig();

            httpConfig.Query   = "https://api.anti-captcha.com/createTask";
            httpConfig.IsPost  = true;
            httpConfig.Content = new StringContent(SerializeObjectToJson(creatTaskObject), Encoding.UTF8, "application/json");

            var result = RuntimeTask.Run(Context, httpModule, httpConfig);

            if (result.Details.Result != ActionExecutionResult.Error)
            {
                using (var stream = result.Data.GetContent())
                {
                    return(DeserializeJsonToObject <AntigateCreateTaskResponse>(stream));
                }
            }

            return(null);
        }
예제 #6
0
        private void FillSerpPageDetails(IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, List <SerpWebPage> pages)
        {
            if (httpModule == null)
            {
                throw new NullReferenceException("httpModule");
            }

            foreach (var page in pages)
            {
                try
                {
                    var serpUrl       = new Uri(page.Url);
                    var targetBuilder = new UriBuilder(serpUrl.Scheme, serpUrl.Host, serpUrl.Port, "favicon.ico");

                    var result = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
                    {
                        Query = targetBuilder.Uri.ToString()
                    });

                    if (result.IsSuccessfully)
                    {
                        using (var strean = result.Data.GetContent())
                        {
                            using (MemoryStream ms = new MemoryStream())
                            {
                                strean.CopyTo(ms);
                                page.Favicon = ms.ToArray();
                            }
                        }
                    }
                }
                catch (Exception exc)
                {
                }
            }
        }
예제 #7
0
        private List <YandexHtmlSerpItem> ExecuteRequest(string query, YandexHtmlSerpParser parser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, IRuntimeModule <AntigateTaskConfig, AntigateTaskResult> antigateModule)
        {
            List <YandexHtmlSerpItem> result = new List <YandexHtmlSerpItem>();

            var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = query
            });

            if (httpResult.IsSuccessfully)
            {
                using (var stream = httpResult.Data.GetContent())
                {
                    var parserResult = RuntimeTask.Run(Context, parser, stream);
                    if (!parserResult.IsSuccessfully)
                    {
                        throw parserResult.Error;
                    }

                    result.AddRange(parserResult.Data);
                }
            }
            return(result);
        }
예제 #8
0
        private LexModuleStats Analyze(Request request)
        {
            if (Configuration.DataProvider == null)
            {
                throw new UserException("Data provider not specified");
            }
            var registry = this.Context.Provider.GetService <IProjectModuleRegistry>();

            var module = registry.OpenModule(Context, Configuration.DataProvider.Code, Configuration.DataProvider.Id);

            var dataProvider = module as IRuntimeModule <string, List <SerpWebPage> >;

            if (dataProvider == null)
            {
                throw new UserException("Data provider is incorrected");
            }

            IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule = null;

            if (Configuration.HttpModuleId.HasValue)
            {
                var targetModule = Context.Repository.GetModule(Configuration.HttpModuleId.Value);

                if (targetModule == null)
                {
                    throw new NullReferenceException("Http Module module");
                }

                httpModule = registry.OpenModule(Context, targetModule.Code, targetModule.Id) as IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData>;
            }

            ModuleTaskResult <List <SerpWebPage> > dataResult;

            var iteration = 0;

            do
            {
                iteration++;
                if (iteration > 1)
                {
                    Context.LogInform(Context.Localizer[$"The attemp {iteration} to get position of '{request.Text}'"]);
                }

                dataResult = RuntimeTask.Run(this.Context, dataProvider, request.Text);

                if (dataResult.IsSuccessfully)
                {
                    break;
                }
            } while (iteration <= Configuration.RequestAttempt);

            if (!dataResult.IsSuccessfully)
            {
                throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, httpModule.Name]);
            }

            FillSerpPageDetails(httpModule, dataResult.Data);

            var analyser = new MedianFrequencyAnalyser();

            var result = RuntimeTask.Run(Context, analyser, new MedianFrequencyAnalyserConfig()
            {
                Pages = dataResult.Data, Request = request, HttpModule = httpModule
            });

            if (result.IsSuccessfully)
            {
                return(result.Data);
            }

            throw new UserException(this.Context.Localizer[RuntimeTask.RuntimeException, analyser.Name]);
        }
예제 #9
0
        private void TryExtractDetaisl(HtmlTagPageParser tagParser, IRuntimeModule <HttpProviderRuntimeConfig, HttpProviderData> httpModule, string url, out string redirectUrl, out string title)
        {
            redirectUrl = string.Empty;
            title       = string.Empty;

            var httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
            {
                Query = url
            });

            if (httpResult.IsSuccessfully)
            {
                var extractor = new HtmlTextAttributeParser()
                {
                    Attribute  = "content",
                    PostHandle = (value) =>
                    {
                        string urlTag     = "URL=";
                        int    startIndex = value.IndexOf(urlTag);
                        if (startIndex != -1)
                        {
                            value = value.Substring(startIndex + urlTag.Length, value.Length - (urlTag.Length + startIndex));
                            value = value.Trim('\'');
                        }
                        return(value);
                    }
                };

                var tagConfig = new HtmlTagPageConfig();
                tagConfig.Tags.Add("redirecturl", new SinglHtmlNodeSelector()
                {
                    Tag = new TagProperties()
                    {
                        Path = @"//meta[@http-equiv='refresh']", Extractor = extractor
                    }
                });
                tagConfig.Tags.Add("title", new SinglHtmlNodeSelector()
                {
                    Tag = new TagProperties()
                    {
                        Path = @"//title", Extractor = new HtmlTagInnerText()
                    }
                });

                using (var stream = httpResult.Data.GetContent())
                {
                    tagConfig.Stream = stream;

                    var parseResult = RuntimeTask.Run(Context, tagParser, tagConfig);
                    if (parseResult.IsSuccessfully)
                    {
                        if (parseResult.Data.Values["redirecturl"] != null)
                        {
                            redirectUrl = (string)parseResult.Data.Values["redirecturl"];
                        }

                        if (parseResult.Data.Values["title"] != null)
                        {
                            title = (string)parseResult.Data.Values["title"];
                        }
                    }
                }

                if (!string.IsNullOrEmpty(redirectUrl))
                {
                    httpResult = RuntimeTask.Run(Context, httpModule, new HttpProviderRuntimeConfig()
                    {
                        Query = redirectUrl
                    });
                    if (httpResult.IsSuccessfully)
                    {
                        using (var stream = httpResult.Data.GetContent())
                        {
                            tagConfig.Stream = stream;
                            var tagResult = tagParser.Run(tagConfig);

                            if (tagResult.Values["title"] != null)
                            {
                                title = (string)tagResult.Values["title"];
                            }
                        }
                    }
                }
            }
        }