public List <ScrapyResult> Process(ScrapyResponse response, ScrapySelector selector)
        {
            var results = new List <ScrapyResult>();

            if (selector.IsSingle == true)
            {
                var value = response.BodyContent;

                if (!string.IsNullOrEmpty(selector.Query))
                {
                    var nodes = _htmlParser.Parse(response.BodyContent).QuerySelectorAll(selector.Query);
                    value = nodes.Length > 0 ? nodes[0].OuterHtml : "";
                }

                var result = GetScrapyResult(selector.Name, value, response.HttpHeaders, selector);
                results.Add(result);
            }
            else
            {
                var nodes = _htmlParser.Parse(response.BodyContent).QuerySelectorAll(selector.Query);

                for (int index = 0; index < nodes.Length; index++)
                {
                    var node = nodes[index];
                    results.Add(GetScrapyResult($"{selector.Name}[{index}]", node.OuterHtml, response.HttpHeaders, selector));
                }
            }

            return(results);
        }
Пример #2
0
        public ScrapyResult Process(ScrapyResponse response, ScrapySelector selector)
        {
            var results    = GetResults(response, selector);
            var parameters = GetParameters(results, selector);

            return(new ScrapyResult()
            {
                SubResults = results,
                Parameters = parameters,
                Name = selector.Name
            });
        }
Пример #3
0
        public List <ScrapyResult> Process(ScrapyResponse response, ScrapySelector selector)
        {
            var results = new List <ScrapyResult>();

            foreach (var header in response.HttpHeaders)
            {
                results.Add(new ScrapyResult()
                {
                    Name       = header.Name,
                    Value      = header.Value,
                    ResultType = selector.ResultType
                });
            }

            return(results);
        }
Пример #4
0
        private List <ScrapyResult> GetResults(ScrapyResponse response, ScrapySelector selector)
        {
            // get source Processor
            var processor = _scrapySourceProcessorBuilder.Processors[selector.SourceType];

            var results = processor.Process(response, selector).ToList();

            if (selector.SubSelectors.Count() > 0)
            {
                foreach (var result in results)
                {
                    foreach (var subSelector in selector.SubSelectors)
                    {
                        var subResults = GetResults(result.ProcessedResponse, subSelector);
                        result.SubResults.AddRange(subResults);
                    }
                }
            }

            return(results);
        }
Пример #5
0
        private Dictionary <string, string> GetParameters(IEnumerable <ScrapyResult> results, ScrapySelector selector, string prefix = "")
        {
            var parameters = new Dictionary <string, string>();

            foreach (var result in results)
            {
                var parentName    = !string.IsNullOrEmpty(prefix) ? $"{prefix}." : "";
                var parameterName = $"{parentName}{result.Name}";

                if (selector.IsParameter == true)
                {
                    parameters[parameterName] = result.Value;
                }

                foreach (var subSelector in selector.SubSelectors)
                {
                    var subParameters = GetParameters(result.SubResults, subSelector, parameterName);

                    foreach (var subParameter in subParameters)
                    {
                        parameters[subParameter.Key] = subParameter.Value;
                    }
                }
            }


            return(parameters);
        }
 public List <ScrapyResult> Process(ScrapyResponse content, ScrapySelector selector)
 {
     throw new NotImplementedException();
 }
        private ScrapyResult GetScrapyResult(string name, string content, List <HttpHeader> headers, ScrapySelector selector)
        {
            var result = new ScrapyResult()
            {
                Name = name, ResultType = selector.ResultType
            };

            if (string.IsNullOrEmpty(selector.Regex))
            {
                result.Value = content;
            }
            else
            {
                var reg     = new Regex(selector.Regex);
                var matches = reg.Match(content);
                result.Value = matches.Groups[selector.Name].Value;
            }

            if (string.IsNullOrEmpty(result.Value))
            {
                result.Value = selector.DefaultValue;
            }

            result.ProcessedResponse = new ScrapyResponse(result.Value, headers);

            return(result);
        }