Example #1
0
        private string GetHtmlNodeValue(HtmlAgilityPack.HtmlNode hn, ThinkCrawlField field)
        {
            string value = "";

            if (hn == null)
            {
                return(value);
            }
            var node = hn.SelectSingleNode(field.XPath);

            if (node != null)
            {
                if (!string.IsNullOrEmpty(field.Attr))
                {
                    value = GetHtmlNodeAttributeValue(node, field.Attr);
                }
                else if (field.IsHtml)
                {
                    value = node.InnerHtml;
                }
                else
                {
                    value = StringUtil.RemoveHTML(node.InnerText);
                }
            }
            return(value);
        }
Example #2
0
        private void ExpandFieldData(ThinkCrawlField field, dynamic data, HtmlAgilityPack.HtmlNode root, HtmlAgilityPack.HtmlNode parentNode = null)
        {
            string name       = field.Name;
            string fieldXPath = field.XPath;

            if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(fieldXPath))
            {
                SetDynamicValue(data, name, "");
            }
            if (field.Type == ThinkCrawlFieldType.Single)
            {
                SetDynamicValue(data, name, GetHtmlNodeValue(field.Inherit ? parentNode : root, field));
            }
            else if (field.Type == ThinkCrawlFieldType.Group)
            {
                List <dynamic> childList = new List <dynamic>();
                var            nodes     = root.SelectNodes(field.XPath);
                if (nodes != null && nodes.Count > 0)
                {
                    foreach (var node in nodes)
                    {
                        dynamic childData = new ExpandoObject();
                        foreach (var childField in field.Children)
                        {
                            ExpandFieldData(childField, childData, root, node);
                        }
                        childList.Add(childData);
                    }
                }
                SetDynamicValue(data, name, childList);
            }
        }