public List <ScrapyResult> Process(ScrapyResponse response, ScrapySelector selector) { var results = new List <ScrapyResult>(); if (selector.IsSingle == true) { var value = response.BodyContent; if (!string.IsNullOrEmpty(selector.Query)) { var nodes = _htmlParser.Parse(response.BodyContent).QuerySelectorAll(selector.Query); value = nodes.Length > 0 ? nodes[0].OuterHtml : ""; } var result = GetScrapyResult(selector.Name, value, response.HttpHeaders, selector); results.Add(result); } else { var nodes = _htmlParser.Parse(response.BodyContent).QuerySelectorAll(selector.Query); for (int index = 0; index < nodes.Length; index++) { var node = nodes[index]; results.Add(GetScrapyResult($"{selector.Name}[{index}]", node.OuterHtml, response.HttpHeaders, selector)); } } return(results); }
public ScrapyResult Process(ScrapyResponse response, ScrapySelector selector) { var results = GetResults(response, selector); var parameters = GetParameters(results, selector); return(new ScrapyResult() { SubResults = results, Parameters = parameters, Name = selector.Name }); }
public List <ScrapyResult> Process(ScrapyResponse response, ScrapySelector selector) { var results = new List <ScrapyResult>(); foreach (var header in response.HttpHeaders) { results.Add(new ScrapyResult() { Name = header.Name, Value = header.Value, ResultType = selector.ResultType }); } return(results); }
private List <ScrapyResult> GetResults(ScrapyResponse response, ScrapySelector selector) { // get source Processor var processor = _scrapySourceProcessorBuilder.Processors[selector.SourceType]; var results = processor.Process(response, selector).ToList(); if (selector.SubSelectors.Count() > 0) { foreach (var result in results) { foreach (var subSelector in selector.SubSelectors) { var subResults = GetResults(result.ProcessedResponse, subSelector); result.SubResults.AddRange(subResults); } } } return(results); }
private Dictionary <string, string> GetParameters(IEnumerable <ScrapyResult> results, ScrapySelector selector, string prefix = "") { var parameters = new Dictionary <string, string>(); foreach (var result in results) { var parentName = !string.IsNullOrEmpty(prefix) ? $"{prefix}." : ""; var parameterName = $"{parentName}{result.Name}"; if (selector.IsParameter == true) { parameters[parameterName] = result.Value; } foreach (var subSelector in selector.SubSelectors) { var subParameters = GetParameters(result.SubResults, subSelector, parameterName); foreach (var subParameter in subParameters) { parameters[subParameter.Key] = subParameter.Value; } } } return(parameters); }
public List <ScrapyResult> Process(ScrapyResponse content, ScrapySelector selector) { throw new NotImplementedException(); }
private ScrapyResult GetScrapyResult(string name, string content, List <HttpHeader> headers, ScrapySelector selector) { var result = new ScrapyResult() { Name = name, ResultType = selector.ResultType }; if (string.IsNullOrEmpty(selector.Regex)) { result.Value = content; } else { var reg = new Regex(selector.Regex); var matches = reg.Match(content); result.Value = matches.Groups[selector.Name].Value; } if (string.IsNullOrEmpty(result.Value)) { result.Value = selector.DefaultValue; } result.ProcessedResponse = new ScrapyResponse(result.Value, headers); return(result); }