private string GetHtmlNodeValue(HtmlAgilityPack.HtmlNode hn, ThinkCrawlField field) { string value = ""; if (hn == null) { return(value); } var node = hn.SelectSingleNode(field.XPath); if (node != null) { if (!string.IsNullOrEmpty(field.Attr)) { value = GetHtmlNodeAttributeValue(node, field.Attr); } else if (field.IsHtml) { value = node.InnerHtml; } else { value = StringUtil.RemoveHTML(node.InnerText); } } return(value); }
private void ExpandFieldData(ThinkCrawlField field, dynamic data, HtmlAgilityPack.HtmlNode root, HtmlAgilityPack.HtmlNode parentNode = null) { string name = field.Name; string fieldXPath = field.XPath; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(fieldXPath)) { SetDynamicValue(data, name, ""); } if (field.Type == ThinkCrawlFieldType.Single) { SetDynamicValue(data, name, GetHtmlNodeValue(field.Inherit ? parentNode : root, field)); } else if (field.Type == ThinkCrawlFieldType.Group) { List <dynamic> childList = new List <dynamic>(); var nodes = root.SelectNodes(field.XPath); if (nodes != null && nodes.Count > 0) { foreach (var node in nodes) { dynamic childData = new ExpandoObject(); foreach (var childField in field.Children) { ExpandFieldData(childField, childData, root, node); } childList.Add(childData); } } SetDynamicValue(data, name, childList); } }