public static ConfigSection ParseJsonString(string json)
        {
            var parsedJson = (JObject)JsonConvert.DeserializeObject(json);

            var rootSection = new ConfigSection();
            ParseSection(parsedJson, rootSection);
            return rootSection;
        }
        private static void ProcessConfigName(JObject parsedJson, ConfigSection currentConfig)
        {
            var configNameKeyName = "_configName";

            if (parsedJson[configNameKeyName] != null && parsedJson[configNameKeyName].Type == JTokenType.String)
            {
                currentConfig.ConfigName = parsedJson[configNameKeyName].ToString();
            }
        }
        public static ConfigSection Parse(string configPath)
        {
            var rawConfig = File.ReadAllText(configPath, Encoding.UTF8);
            var parsedJson = (JObject)JsonConvert.DeserializeObject(rawConfig);

            var rootSection = new ConfigSection();
            ParseSection(parsedJson, rootSection);
            return rootSection;
        }
        public StructuredDataExtractor(ConfigSection config)
        {
            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            this.config = config;
            
            this.LoadTransformations();
        }
        private static void ProcessUrlPatterns(JObject parsedJson, ConfigSection currentConfig)
        {
            var urlPatternsKeyName = "_urlPatterns";

            if (parsedJson[urlPatternsKeyName] != null && parsedJson[urlPatternsKeyName].Type == JTokenType.Array)
            {
                foreach (var urlPattern in parsedJson[urlPatternsKeyName])
                {
                    currentConfig.UrlPatterns.Add(urlPattern.ToString());
                }
            }
        }
 private static void ParseSection(JObject parsedJson, ConfigSection currentConfig)
 {
     foreach (var item in parsedJson)
     {
         switch (item.Key)
         {
             case "_configName":
                 // Process the friendly internal name we have to this config
                 ProcessConfigName(parsedJson, currentConfig);
                 break;
             case "_urlPatterns":
                 // Process configuration on URL patterns that should be used to
                 // determine which HTML pages to process with the configuration
                 ProcessUrlPatterns(parsedJson, currentConfig);
                 break;
             case "_removeTags":
                 // Process the list of descendent tags to remove before extracting any data from this node
                 ProcessRemoveTags(parsedJson, currentConfig);
                 break;
             case "_xpath":
                 // Process configuration on XPath rules used to extract this block
                 // from HTML
                 ProcessXPathRules(parsedJson, currentConfig);
                 break;
             case "_transformations": // one or more transformations
                 // Process configuration on how to clean/modify the values extracted 
                 // from HTML
                 ProcessTransformations(parsedJson, currentConfig);
                 break;
             case "_transformation": // single transformation
                 // Process configuration on how to clean/modify the values extracted 
                 // from HTML
                 ProcessTransformation(parsedJson, currentConfig);
                 break;
             case "_forceArray":
                 // Force the value of this item to be an array (true or false)
                 ProcessForceArray(parsedJson, currentConfig);
                 break;
             default:
                 // We assume all other keys in the JSON at this level are for actual HTML 
                 // sections that need to be extracted, not configuration settings
                 ProcessChild(item.Key, item.Value, parsedJson, currentConfig);
                 break;
         }
     }
 }
        private static void ProcessChild(string childName, JToken rawChild, JObject parsedJson, ConfigSection currentConfig)
        {
            if (rawChild.Type == JTokenType.Object)
            {
                var childSection = new ConfigSection();
                ParseSection(rawChild.Value<JObject>(), childSection);
                currentConfig.Children[childName] = childSection;
            }
            else if (rawChild.Type == JTokenType.String)
            {
                var childSection = new ConfigSection();
                childSection.XPathRules.Add(rawChild.ToString());

                currentConfig.Children[childName] = childSection;
            }
        }
        private static void ProcessForceArray(JObject parsedJson, ConfigSection currentConfig)
        {
            var forceArrayKeyName = "_forceArray";

            if (parsedJson[forceArrayKeyName] != null)
            {
                if (parsedJson[forceArrayKeyName].Type == JTokenType.Boolean)
                {
                    currentConfig.ForceArray = ((JValue)parsedJson[forceArrayKeyName]).ToObject<bool>();
                }
            }
        }
        private static void ProcessTransformation(ConfigSection currentConfig, JToken transformation)
        {
            var transformationConfig = new TransformationConfig();

            foreach (var item in transformation)
            {
                if (item.Type == JTokenType.Property)
                {
                    var property = (JProperty)item;
                    var propertyName = property.Name.ToString();
                    var propertyValue = property.Value;

                    switch (propertyName)
                    {
                        case "_type":
                            transformationConfig.Type = propertyValue.ToString();
                            break;
                        default:
                            transformationConfig.ConfigAttributes[propertyName] = propertyValue;
                            break;
                    }
                }
            }

            if (!string.IsNullOrWhiteSpace(transformationConfig.Type))
            {
                currentConfig.Transformations.Add(transformationConfig);
            }
        }
        private object Extract(string name, ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, List<HtmlAgilityPack.HtmlNode> logicalParents)
        {
            this.RemoveUnwantedTags(config, parentNode);

            // We will try to extract text for this item because it does not have children
            var containers = new JArray();

            if (config.XPathRules != null && config.XPathRules.Count > 0)
            {
                foreach (var xpath in config.XPathRules)
                {
                    // TODO: Add try catch Exception
                    var nodes = parentNode.SelectNodes(xpath);

                    if (nodes != null && nodes.Count > 0)
                    {
                        var newLogicalParents = logicalParents.GetRange(0, logicalParents.Count);
                        newLogicalParents.Add(parentNode);

                        foreach (var node in nodes)
                        {
                            if (config.Children != null && config.Children.Count > 0)
                            {
                                var container = new JObject();
                                this.ExtractChildren(config: config, parentNode: node, container: container, logicalParents: newLogicalParents);
                                containers.Add(container);
                            }
                            else if (config.Transformations != null && config.Transformations.Count > 0)
                            {
                                var obj = this.RunTransformations(config.Transformations, node, newLogicalParents);

                                if (obj != null)
                                {
                                    containers.Add(obj);
                                }
                            }
                            else if (node.InnerText != null)
                            {
                                containers.Add(HtmlEntity.DeEntitize(node.InnerText).Trim());
                            }
                        }
                    }
                }
            }
            else
            {
                var container = new JObject();
                this.ExtractChildren(config: config, parentNode: parentNode, container: container, logicalParents: logicalParents);
                containers.Add(container);
            }

            if (!config.ForceArray && containers.Count == 0)
            {
                return new JObject();
            }
            else if (!config.ForceArray && containers.Count == 1)
            {
                return containers.First;
            }
            else
            {
                return containers;
            }
        }
        private static void ProcessTransformations(JObject parsedJson, ConfigSection currentConfig)
        {
            var transformationsKeyName = "_transformations";

            if (parsedJson[transformationsKeyName] != null && parsedJson[transformationsKeyName].Type == JTokenType.Array)
            {
                var transformations = parsedJson[transformationsKeyName];

                foreach (var transformation in transformations)
                {
                    if (transformation.Type == JTokenType.Object)
                    {
                        ProcessTransformation(currentConfig, transformation);
                    }
                    else if (transformation.Type == JTokenType.String)
                    {
                        ProcessTransformation(currentConfig, TransformationConfigFromName(transformation.ToString()));
                    }
                }
            }
        }
        private static void ProcessXPathRules(JObject parsedJson, ConfigSection currentConfig)
        {
            var xPathKeyName = "_xpath";

            if (parsedJson[xPathKeyName] != null)
            {
                if (parsedJson[xPathKeyName].Type == JTokenType.Array)
                {
                    foreach (var xPath in parsedJson[xPathKeyName])
                    {
                        currentConfig.XPathRules.Add(xPath.ToString());
                    }
                }
                else if (parsedJson[xPathKeyName].Type == JTokenType.String)
                {
                    currentConfig.XPathRules.Add(parsedJson[xPathKeyName].ToString());
                }
            }
        }
        private void ExtractChildren(ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, JObject container, List<HtmlAgilityPack.HtmlNode> logicalParents)
        {
            foreach (var child in config.Children)
            {
                var childName = child.Key;
                var childConfig = child.Value;

                var childObject = this.Extract(name: childName, config: childConfig, parentNode: parentNode, logicalParents: logicalParents);

                if (childObject is JObject)
                {
                    if (((JObject)childObject).Count > 0)
                    {
                        container[childName] = (JToken)childObject;
                    }
                }
                else if (childObject is JArray)
                {
                    if (((JArray)childObject).Count > 0)
                    {
                        container[childName] = (JToken)childObject;
                    }
                }
                else
                {
                    container[childName] = (JToken)childObject;
                }
            }
        }
 private void RemoveUnwantedTags(ConfigSection config, HtmlAgilityPack.HtmlNode parentNode)
 {
     if (parentNode != null && config != null && config.RemoveTags != null && config.RemoveTags.Count > 0)
     {
         parentNode.Descendants()
                 .Where(n => config.RemoveTags.Contains(n.Name.ToLowerInvariant()))
                 .ToList()
                 .ForEach(n => n.Remove());
     }
 }
        private static void ProcessRemoveTags(JObject parsedJson, ConfigSection currentConfig)
        {
            var removeTagsKeyName = "_removeTags";

            if (parsedJson[removeTagsKeyName] != null && parsedJson[removeTagsKeyName].Type == JTokenType.Array)
            {
                foreach (var removeTagPattern in parsedJson[removeTagsKeyName])
                {
                    currentConfig.RemoveTags.Add(removeTagPattern.ToString().ToLowerInvariant());
                }
            }
        }
        private static void ProcessChild(string childName, JToken rawChild, JObject parsedJson, ConfigSection currentConfig)
        {
            if (rawChild.Type == JTokenType.Object)
            {
                var childSection = new ConfigSection();
                ParseSection(rawChild.Value <JObject>(), childSection);
                currentConfig.Children[childName] = childSection;
            }
            else if (rawChild.Type == JTokenType.String)
            {
                var childSection = new ConfigSection();
                childSection.XPathRules.Add(rawChild.ToString());

                currentConfig.Children[childName] = childSection;
            }
        }
        private static void ProcessTransformation(JObject parsedJson, ConfigSection currentConfig)
        {
            var transformationKeyName = "_transformation";

            if (parsedJson[transformationKeyName] != null && parsedJson[transformationKeyName].Type == JTokenType.String)
            {
                ProcessTransformation(currentConfig, TransformationConfigFromName(parsedJson[transformationKeyName].ToString()));
            }
        }