Example #1
0
        public override void VisitHtml(HtmlContentIntermediateNode node)
        {
            for (var i = 0; i < node.Children.Count; i++)
            {
                var child = node.Children[i];
                if (!(child is IntermediateToken token) || !token.IsHtml || string.IsNullOrEmpty(token.Content))
                {
                    // We only care about Html tokens.
                    continue;
                }

                for (var j = 0; j < token.Content.Length; j++)
                {
                    var ch = token.Content[j];
                    // ASCII range is 0 - 127
                    if (ch > 127 || EncodedCharacters.Contains(ch))
                    {
                        node.SetEncoded();
                        return;
                    }
                }
            }

            // If we reach here, we don't have newlines, tabs or non-ascii characters in this node.
            // If we can successfully decode all HTML entities(if any) in this node, we can safely let it call AddContent.
            var decodedContent = new string[node.Children.Count];

            for (var i = 0; i < node.Children.Count; i++)
            {
                var child = node.Children[i];
                if (!(child is IntermediateToken token) || !token.IsHtml || string.IsNullOrEmpty(token.Content))
                {
                    // We only care about Html tokens.
                    continue;
                }

                if (TryDecodeHtmlEntities(token.Content, out var decoded))
                {
                    decodedContent[i] = decoded;
                }
                else
                {
                    node.SetEncoded();
                    return;
                }
            }

            // If we reach here, it means we have successfully decoded all content.
            // Replace all token content with the decoded value.
            for (var i = 0; i < node.Children.Count; i++)
            {
                var child = node.Children[i];
                if (!(child is IntermediateToken token) || !token.IsHtml || string.IsNullOrEmpty(token.Content))
                {
                    // We only care about Html tokens.
                    continue;
                }

                token.Content = decodedContent[i];
            }
        }