public async Task RenderAsync(ISpiderContainer container) { var doc = new HtmlDocument(); foreach (var item in container.Data) { doc.LoadHtml(item.ToString()); var nodes = doc.DocumentNode.SelectNodes(tag); if (nodes == null || nodes.Count == 0) { continue; } foreach (var node in nodes) { if (node == null) { continue; } var val = XPathRule.FormatNode(node, tagFunc); if (string.IsNullOrWhiteSpace(val)) { continue; } container.SetAttribute(name, val); } } await container.NextAsync(); }
public async Task RenderAsync(ISpiderContainer container) { var regex = new Regex(pattern); var match = regex.Match(container.Data.ToString()); if (match == null) { await container.NextAsync(); return; } if (!string.IsNullOrEmpty(name)) { container.SetAttribute(name, match.Value); } var tags = regex.GetGroupNames(); foreach (var tag in tags) { container.SetAttribute(tag, match.Groups[tag].Value); } await container.NextAsync(); }
public async Task RenderAsync(ISpiderContainer container) { var context = BrowsingContext.New(Configuration.Default.WithDefaultLoader()); foreach (var item in container.Data) { var doc = await context.OpenAsync(req => req.Content(item.ToString())); var node = doc.QuerySelector(tag); if (node == null) { continue; } var val = JQueryRule.FormatNode(node, tagFunc); if (string.IsNullOrWhiteSpace(val)) { continue; } container.SetAttribute(name, val); } await container.NextAsync(); }