Ejemplo n.º 1
0
        protected override void VisitComment(DomComment comment)
        {
            if (comment == null)
                throw new ArgumentNullException("comment");

            writer.WriteComment(comment.Data);
        }
Ejemplo n.º 2
0
        protected override void VisitComment(DomComment comment)
        {
            if (comment == null)
                throw new ArgumentNullException("comment");

            sb.Append("<!--");
            sb.Append(comment.Data);
            sb.Append("-->");
        }
Ejemplo n.º 3
0
 protected override void VisitComment(DomComment node)
 {
     if (PrettyPrint)
     {
         Indent();
     }
     _output
     .Append("<!--")
     .Append(node.Text)
     .Append("-->");
 }
Ejemplo n.º 4
0
        protected override void WriteComment(DomComment comment)
        {
            if (comment == null)
            {
                return;
            }
            if (WriterSettings.PrettyPrint)
            {
                Indent();
            }

            _writer.Write("<!--");
            _writer.Write(comment.Text);
            _writer.Write("-->");
        }
Ejemplo n.º 5
0
 protected override void VisitComment(DomComment comment)
 {
     // noop
 }
        /// <summary>
        /// When CsQuery is provided, an initial indexing context can be used
        /// </summary>
        /// <param name="csq"></param>
        /// <param name="allowLiterals"></param>
        /// <returns></returns>
        protected IEnumerable<IDomObject> Parse(bool allowLiterals)
        {
            int pos=0;
            Stack<IterationData> stack = new Stack<IterationData>();

            while (pos <= EndPos)
            {
                IterationData current = new IterationData();
                current.AllowLiterals = allowLiterals;
                current.Reset(pos);
                stack.Push(current);

                while (stack.Count != 0)
                {

                    current = stack.Pop();
                    //Debug.Assert(current.Object == null);

                    while (!current.Finished && current.Pos <= EndPos)
                    {
                        char c = BaseHtml[current.Pos];
                        switch (current.Step)
                        {
                            case 0:
                                current.Pos = CharIndexOf(BaseHtml, '<', current.Pos);
                                if (current.Pos  < 0)
                                {
                                    // done - no new tags found
                                    current.Pos = EndPos + 1;
                                }
                                else {
                                    // deal with when we're in a literal block (script/textarea)
                                    if (current.ReadTextOnly)
                                    {
                                        int endPos = current.Pos;
                                        while (endPos >= 0)
                                        {
                                            // keep going until we find the closing tag for this element
                                            int caretPos = CharIndexOf(BaseHtml, '>', endPos + 1);
                                            if (caretPos > 0)
                                            {
                                                string tag = BaseHtml.SubstringBetween(endPos + 1, caretPos).Trim().ToLower();
                                                if (tag == "/" +current.Parent.Element.NodeName)
                                                {
                                                    // this is the end tag -- exit the block
                                                    current.Pos=endPos;
                                                    break;
                                                }
                                            }
                                            endPos = CharIndexOf(BaseHtml, '<', endPos + 1);
                                        }
                                    }
                                    // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish
                                    current.Step=1;
                                }
                                break;
                            case 1:
                                if (current.Pos > current.HtmlStart)
                                {
                                    IDomObject literal = GetLiteral(current);
                                    if (literal != null)
                                    {
                                        yield return literal;
                                    }

                                    continue;
                                }

                                int tagStartPos = current.Pos;
                                string newTag;

                                newTag = GetTagOpener(current);

                                string newTagLower = newTag.ToLower();

                                // when Element exists, it's because a previous iteration created it: it's our parent
                                string parentTag = String.Empty;
                                if (current.Parent != null)
                                {
                                    parentTag = current.Parent.Element.NodeName.ToLower();
                                }

                                if (newTag == String.Empty)
                                {
                                    // It's a tag closer. Make sure it's the right one.
                                    current.Pos = tagStartPos + 1;
                                    string closeTag = GetCloseTag(current);

                                    // Ignore empty tags, or closing tags found when no parent is open
                                    bool isProperClose = closeTag.ToLower() == parentTag;
                                    if (closeTag == String.Empty)
                                    {
                                        // ignore empty tags
                                        continue;
                                    }
                                    else
                                    {
                                        // locate match for this closer up the heirarchy
                                        IterationData actualParent =null;

                                        if (!isProperClose)
                                        {
                                            actualParent = current.Parent;
                                            while (actualParent != null && actualParent.Element.NodeName.ToLower() != closeTag.ToLower())
                                            {
                                                actualParent = actualParent.Parent;
                                            }
                                        }
                                        // if no matching close tag was found up the tree, ignore it
                                        // otherwise always close this and repeat at the same position until the match is found
                                        if (!isProperClose && actualParent == null)
                                        {
                                            current.Invalid = true;
                                            continue;
                                        }
                                    }
                                   // element is closed

                                    if (current.Parent.Parent == null)
                                    {
                                        yield return current.Parent.Element;
                                    }
                                    current.Finished = true;
                                    if (isProperClose)
                                    {
                                        current.Parent.Reset(current.Pos);
                                    }
                                    else
                                    {
                                        current.Parent.Reset(tagStartPos);
                                    }
                                    // already been returned before we added the children
                                    continue;
                                }
                                // Before we keep going see if this is an implicit close
                                if (parentTag != String.Empty)
                                {
                                    if (TagHasImplicitClose(parentTag,newTag)
                                        && parentTag == newTag)
                                    {
                                        // same tag for a repeater like li occcurred - treat like a close tag
                                        if (current.Parent.Parent == null)
                                        {
                                            yield return current.Parent.Element;
                                        }
                                        current.Parent.Reset(tagStartPos);
                                        current.Finished = true;

                                        continue;
                                    }
                                }
                                // seems to be a new tag. Parse it

                                IDomSpecialElement specialElement = null;

                                if (newTagLower[0] == '!')
                                {
                                    if (newTagLower.StartsWith("!doctype"))
                                    {
                                        specialElement = new DomDocumentType();
                                        current.Object = specialElement;
                                    }
                                    else if (newTagLower.StartsWith("![cdata["))
                                    {
                                        specialElement = new DomCData();
                                        current.Object = specialElement;
                                        current.Pos = tagStartPos + 9;
                                    }
                                    else
                                    {
                                        specialElement = new DomComment();
                                        current.Object = specialElement;
                                        if (newTagLower.StartsWith("!--"))
                                        {
                                            ((DomComment)specialElement).IsQuoted = true;
                                            current.Pos = tagStartPos + 4;
                                        } else {
                                            current.Pos = tagStartPos+1;
                                        }
                                    }
                                }
                                else
                                {
                                    current.Object = new DomElement(newTag);

                                    if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed)
                                    {
                                        current.ReadTextOnly = true;
                                        current.Step = 0;
                                    }
                                }

                                // Handle non-element/text types -- they have data inside the tag construct

                                if (current.Object is IDomSpecialElement)
                                {
                                    string endTag = (current.Object is IDomComment && ((IDomComment)current.Object).IsQuoted) ? "-->" : ">";

                                    int tagEndPos = BaseHtml.Seek(endTag, current.Pos);
                                    if (tagEndPos <0)
                                    {
                                        // if a tag is unclosed entirely, then just find a new line.
                                        tagEndPos = BaseHtml.Seek(System.Environment.NewLine, current.Pos);
                                    }
                                    if (tagEndPos < 0)
                                    {
                                        // Never closed, no newline - junk, treat it like such
                                        tagEndPos = EndPos;
                                    }

                                    specialElement.NonAttributeData = BaseHtml.SubstringBetween(current.Pos, tagEndPos);
                                    current.Pos = tagEndPos;
                                }
                                else
                                {
                                    // Parse attribute data
                                    while (current.Pos <= EndPos)
                                    {
                                        if (!GetTagAttribute(current)) break;
                                    }
                                }

                                bool hasChildren = MoveOutsideTag(current);

                                // tricky part: if there are children, push ourselves back on the stack and start with a new object
                                // from this position. The children will add themselves as they are created, avoiding recursion.
                                // When the close tag is found, the parent will be yielded if it's a root element.
                                // I think there's a slightly better way to do this, capturing all the yield logic at the end of the
                                // stack but it works for now.

                                if (current.Parent != null)
                                {
                                    current.Parent.Element.AppendChild(current.Object);
                                } else if (!hasChildren) {
                                    yield return current.Object;
                                }

                                if (!hasChildren)
                                {
                                    current.Reset();
                                    continue;
                                }

                                stack.Push(current);

                                IterationData subItem = new IterationData();
                                subItem.Parent = current;
                                subItem.AllowLiterals = true;
                                subItem.Reset(current.Pos);
                                subItem.ReadTextOnly = current.ReadTextOnly;
                                current = subItem;
                                break;

                        }
                    }
                    // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because  top-level tag was unclosed.
                    // THis will wrap up any straggling text and close any open tags after it.
                    if (!current.Finished)
                    {
                        if (current.Pos > current.HtmlStart)
                        {
                            IDomObject literal = GetLiteral(current);
                            if (literal != null)
                            {
                                yield return literal;
                            }
                        }

                        if (current.Parent != null)
                        {
                            if (current.Parent.Parent == null)
                            {
                                yield return current.Parent.Element;
                            }
                            current.Parent.Reset(current.Pos);
                            current.Finished = true;
                        }

                    }
                }
                pos = current.Pos;
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// When CsQuery is provided, an initial indexing context can be used
        /// </summary>
        /// <param name="csq"></param>
        /// <param name="allowLiterals"></param>
        /// <returns></returns>
        protected IEnumerable <IDomObject> Parse(bool allowLiterals)
        {
            int pos = 0;
            Stack <IterationData> stack = new Stack <IterationData>();

            while (pos <= EndPos)
            {
                IterationData current = new IterationData();
                current.AllowLiterals = allowLiterals;
                current.Reset(pos);
                stack.Push(current);

                while (stack.Count != 0)
                {
                    current = stack.Pop();
                    //Debug.Assert(current.Object == null);

                    while (!current.Finished && current.Pos <= EndPos)
                    {
                        char c = BaseHtml[current.Pos];
                        switch (current.Step)
                        {
                        case 0:
                            current.Pos = CharIndexOf(BaseHtml, '<', current.Pos);
                            if (current.Pos < 0)
                            {
                                // done - no new tags found
                                current.Pos = EndPos + 1;
                            }
                            else
                            {
                                // deal with when we're in a literal block (script/textarea)
                                if (current.ReadTextOnly)
                                {
                                    int endPos = current.Pos;
                                    while (endPos >= 0)
                                    {
                                        // keep going until we find the closing tag for this element
                                        int caretPos = CharIndexOf(BaseHtml, '>', endPos + 1);
                                        if (caretPos > 0)
                                        {
                                            string tag = BaseHtml.SubstringBetween(endPos + 1, caretPos).Trim().ToLower();
                                            if (tag == "/" + current.Parent.Element.NodeName)
                                            {
                                                // this is the end tag -- exit the block
                                                current.Pos = endPos;
                                                break;
                                            }
                                        }
                                        endPos = CharIndexOf(BaseHtml, '<', endPos + 1);
                                    }
                                }
                                // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish
                                current.Step = 1;
                            }
                            break;

                        case 1:
                            if (current.Pos > current.HtmlStart)
                            {
                                IDomObject literal = GetLiteral(current);
                                if (literal != null)
                                {
                                    yield return(literal);
                                }

                                continue;
                            }

                            int    tagStartPos = current.Pos;
                            string newTag;

                            newTag = GetTagOpener(current);

                            string newTagLower = newTag.ToLower();

                            // when Element exists, it's because a previous iteration created it: it's our parent
                            string parentTag = String.Empty;
                            if (current.Parent != null)
                            {
                                parentTag = current.Parent.Element.NodeName.ToLower();
                            }

                            if (newTag == String.Empty)
                            {
                                // It's a tag closer. Make sure it's the right one.
                                current.Pos = tagStartPos + 1;
                                string closeTag = GetCloseTag(current);

                                // Ignore empty tags, or closing tags found when no parent is open
                                bool isProperClose = closeTag.ToLower() == parentTag;
                                if (closeTag == String.Empty)
                                {
                                    // ignore empty tags
                                    continue;
                                }
                                else
                                {
                                    // locate match for this closer up the heirarchy
                                    IterationData actualParent = null;

                                    if (!isProperClose)
                                    {
                                        actualParent = current.Parent;
                                        while (actualParent != null && actualParent.Element.NodeName.ToLower() != closeTag.ToLower())
                                        {
                                            actualParent = actualParent.Parent;
                                        }
                                    }
                                    // if no matching close tag was found up the tree, ignore it
                                    // otherwise always close this and repeat at the same position until the match is found
                                    if (!isProperClose && actualParent == null)
                                    {
                                        current.Invalid = true;
                                        continue;
                                    }
                                }
                                // element is closed

                                if (current.Parent.Parent == null)
                                {
                                    yield return(current.Parent.Element);
                                }
                                current.Finished = true;
                                if (isProperClose)
                                {
                                    current.Parent.Reset(current.Pos);
                                }
                                else
                                {
                                    current.Parent.Reset(tagStartPos);
                                }
                                // already been returned before we added the children
                                continue;
                            }
                            // Before we keep going see if this is an implicit close
                            if (parentTag != String.Empty)
                            {
                                if (TagHasImplicitClose(parentTag, newTag) &&
                                    parentTag == newTag)
                                {
                                    // same tag for a repeater like li occcurred - treat like a close tag
                                    if (current.Parent.Parent == null)
                                    {
                                        yield return(current.Parent.Element);
                                    }
                                    current.Parent.Reset(tagStartPos);
                                    current.Finished = true;

                                    continue;
                                }
                            }
                            // seems to be a new tag. Parse it

                            IDomSpecialElement specialElement = null;

                            if (newTagLower[0] == '!')
                            {
                                if (newTagLower.StartsWith("!doctype"))
                                {
                                    specialElement = new DomDocumentType();
                                    current.Object = specialElement;
                                }
                                else if (newTagLower.StartsWith("![cdata["))
                                {
                                    specialElement = new DomCData();
                                    current.Object = specialElement;
                                    current.Pos    = tagStartPos + 9;
                                }
                                else
                                {
                                    specialElement = new DomComment();
                                    current.Object = specialElement;
                                    if (newTagLower.StartsWith("!--"))
                                    {
                                        ((DomComment)specialElement).IsQuoted = true;
                                        current.Pos = tagStartPos + 4;
                                    }
                                    else
                                    {
                                        current.Pos = tagStartPos + 1;
                                    }
                                }
                            }
                            else
                            {
                                current.Object = new DomElement(newTag);

                                if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed)
                                {
                                    current.ReadTextOnly = true;
                                    current.Step         = 0;
                                }
                            }

                            // Handle non-element/text types -- they have data inside the tag construct

                            if (current.Object is IDomSpecialElement)
                            {
                                string endTag = (current.Object is IDomComment && ((IDomComment)current.Object).IsQuoted) ? "-->" : ">";

                                int tagEndPos = BaseHtml.Seek(endTag, current.Pos);
                                if (tagEndPos < 0)
                                {
                                    // if a tag is unclosed entirely, then just find a new line.
                                    tagEndPos = BaseHtml.Seek(System.Environment.NewLine, current.Pos);
                                }
                                if (tagEndPos < 0)
                                {
                                    // Never closed, no newline - junk, treat it like such
                                    tagEndPos = EndPos;
                                }

                                specialElement.NonAttributeData = BaseHtml.SubstringBetween(current.Pos, tagEndPos);
                                current.Pos = tagEndPos;
                            }
                            else
                            {
                                // Parse attribute data
                                while (current.Pos <= EndPos)
                                {
                                    if (!GetTagAttribute(current))
                                    {
                                        break;
                                    }
                                }
                            }

                            bool hasChildren = MoveOutsideTag(current);

                            // tricky part: if there are children, push ourselves back on the stack and start with a new object
                            // from this position. The children will add themselves as they are created, avoiding recursion.
                            // When the close tag is found, the parent will be yielded if it's a root element.
                            // I think there's a slightly better way to do this, capturing all the yield logic at the end of the
                            // stack but it works for now.

                            if (current.Parent != null)
                            {
                                current.Parent.Element.AppendChild(current.Object);
                            }
                            else if (!hasChildren)
                            {
                                yield return(current.Object);
                            }

                            if (!hasChildren)
                            {
                                current.Reset();
                                continue;
                            }

                            stack.Push(current);

                            IterationData subItem = new IterationData();
                            subItem.Parent        = current;
                            subItem.AllowLiterals = true;
                            subItem.Reset(current.Pos);
                            subItem.ReadTextOnly = current.ReadTextOnly;
                            current = subItem;
                            break;
                        }
                    }
                    // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because  top-level tag was unclosed.
                    // THis will wrap up any straggling text and close any open tags after it.
                    if (!current.Finished)
                    {
                        if (current.Pos > current.HtmlStart)
                        {
                            IDomObject literal = GetLiteral(current);
                            if (literal != null)
                            {
                                yield return(literal);
                            }
                        }

                        if (current.Parent != null)
                        {
                            if (current.Parent.Parent == null)
                            {
                                yield return(current.Parent.Element);
                            }
                            current.Parent.Reset(current.Pos);
                            current.Finished = true;
                        }
                    }
                }
                pos = current.Pos;
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Parse the HTML, and return it, based on options set.
        /// </summary>
        ///
        /// <returns>
        /// An enumerator of the top-level elements.
        /// </returns>

        protected IEnumerable <IDomObject> ParseImplementation()
        {
            int pos = 0;
            Stack <IterationData> stack = new Stack <IterationData>();

            while (pos <= EndPos)
            {
                IterationData current = new IterationData();
                if (WrapRootTextNodes)
                {
                    current.WrapLiterals = true;
                }

                current.Reset(pos);
                stack.Push(current);

                while (stack.Count != 0)
                {
                    current = stack.Pop();

                    while (current.TokenizerState != TokenizerState.Finished && current.Pos <= EndPos)
                    {
                        char c = Html[current.Pos];
                        switch (current.TokenizerState)
                        {
                        case TokenizerState.Default:
                            if (current.FindNextTag(Html))
                            {
                                // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish
                                current.TokenizerState = TokenizerState.TagStart;
                            }
                            break;

                        case TokenizerState.TagStart:
                            IDomObject literal;
                            if (current.TryGetLiteral(this, out literal))
                            {
                                yield return(literal);
                            }

                            int tagStartPos = current.Pos;

                            string newTag = current.GetTagOpener(Html);

                            if (newTag == String.Empty)
                            {
                                // It's a tag closer. Make sure it's the right one.
                                current.Pos = tagStartPos + 1;
                                ushort closeTagId = HtmlData.Tokenize(current.GetCloseTag(Html));

                                // Ignore empty tags, or closing tags found when no parent is open
                                bool isProperClose = closeTagId == current.ParentTagID();
                                if (closeTagId == 0)
                                {
                                    // ignore empty tags
                                    continue;
                                }
                                else
                                {
                                    // locate match for this closer up the heirarchy
                                    IterationData actualParent = null;

                                    if (!isProperClose)
                                    {
                                        actualParent = current.Parent;
                                        while (actualParent != null && actualParent.Element.NodeNameID != closeTagId)
                                        {
                                            actualParent = actualParent.Parent;
                                        }
                                    }
                                    // if no matching close tag was found up the tree, ignore it
                                    // otherwise always close this and repeat at the same position until the match is found
                                    if (!isProperClose && actualParent == null)
                                    {
                                        current.InsertionMode = InsertionMode.Invalid;
                                        continue;
                                    }
                                }
                                // element is closed

                                if (current.Parent.Parent == null)
                                {
                                    yield return(current.Parent.Element);
                                }
                                current.TokenizerState = TokenizerState.Finished;
                                if (isProperClose)
                                {
                                    current.Parent.Reset(current.Pos);
                                }
                                else
                                {
                                    current.Parent.Reset(tagStartPos);
                                }
                                // already been returned before we added the children
                                continue;
                            }
                            else if (newTag[0] == '!')
                            {
                                IDomSpecialElement specialElement = null;
                                string             newTagUpper    = newTag.ToUpper();
                                if (newTagUpper.StartsWith("!DOCTYPE"))
                                {
                                    specialElement  = new DomDocumentType();
                                    current.Element = specialElement;
                                }
                                else if (newTagUpper.StartsWith("![CDATA["))
                                {
                                    specialElement  = new DomCData();
                                    current.Element = specialElement;
                                    current.Pos     = tagStartPos + 9;
                                }
                                else
                                {
                                    specialElement  = new DomComment();
                                    current.Element = specialElement;
                                    if (newTag.StartsWith("!--"))
                                    {
                                        ((DomComment)specialElement).IsQuoted = true;
                                        current.Pos = tagStartPos + 4;
                                    }
                                    else
                                    {
                                        current.Pos = tagStartPos + 1;
                                    }
                                }

                                string endTag = (current.Element is IDomComment && ((IDomComment)current.Element).IsQuoted) ? "-->" : ">";

                                int tagEndPos = Html.Seek(endTag, current.Pos);
                                if (tagEndPos < 0)
                                {
                                    // if a tag is unclosed entirely, then just find a new line.
                                    tagEndPos = Html.Seek(System.Environment.NewLine, current.Pos);
                                }
                                if (tagEndPos < 0)
                                {
                                    // Never closed, no newline - junk, treat it like such
                                    tagEndPos = EndPos;
                                }

                                specialElement.NonAttributeData = Html.SubstringBetween(current.Pos, tagEndPos);
                                current.Pos = tagEndPos;
                            }
                            else
                            {
                                // seems to be a new element tag, parse it.

                                ushort newTagId = HtmlData.Tokenize(newTag);

                                // Before we keep going see if this is an implicit close
                                ushort parentTagId = current.ParentTagID();

                                int lastPos = current.Pos;

                                if (parentTagId == 0 && IsDocument)
                                {
                                    if (newTagId != HtmlData.tagHTML)
                                    {
                                        current.Element = DomElement.Create(HtmlData.tagHTML);
                                        current         = current.AddNewChild();
                                        parentTagId     = HtmlData.tagHTML;
                                    }
                                }

                                if (parentTagId != 0)
                                {
                                    ushort action = SpecialTagActionDelegate(parentTagId, newTagId);

                                    while (action != HtmlData.tagActionNothing)
                                    {
                                        if (action == HtmlData.tagActionClose)
                                        {
                                            // track the next parent up the chain

                                            var newNode = (current.Parent != null) ?
                                                          current.Parent : null;

                                            // same tag for a repeater like li occcurred - treat like a close tag

                                            if (current.Parent.Parent == null)
                                            {
                                                yield return(current.Parent.Element);
                                            }

                                            current.TokenizerState = TokenizerState.Finished;
                                            //current.Parent.Reset(tagStartPos);

                                            if (newNode != null && newNode.Parent != null && newNode.Parent.Element != null)
                                            {
                                                action = SpecialTagActionDelegate(newNode.Parent.Element.NodeNameID, newTagId);
                                                if (action != HtmlData.tagActionNothing)
                                                {
                                                    current = newNode;
                                                }
                                            }
                                            else
                                            {
                                                action = HtmlData.tagActionNothing;
                                            }
                                        }
                                        else
                                        {
                                            if (GenerateOptionalElements)
                                            {
                                                stack.Push(current);
                                                current = current.AddNewParent(action, lastPos);
                                            }
                                            action = HtmlData.tagActionNothing;
                                        }
                                    }
                                    if (current.TokenizerState == TokenizerState.Finished)
                                    {
                                        current.Parent.Reset(tagStartPos);
                                        continue;
                                    }
                                }


                                current.Element = DomElement.Create(newTagId);


                                if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed)
                                {
                                    current.InsertionMode  = InsertionMode.Text;
                                    current.TokenizerState = TokenizerState.Default;
                                }

                                // Parse attribute data
                                while (current.Pos <= EndPos)
                                {
                                    if (!current.GetTagAttribute(Html))
                                    {
                                        break;
                                    }
                                }
                            }

                            IDomObject el;

                            if (current.FinishTagOpener(Html, out el))
                            {
                                stack.Push(current);
                                current = current.AddNewChild();
                            }

                            if (el != null)
                            {
                                yield return(el);
                            }

                            break;
                        }
                    }


                    // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because  top-level tag was unclosed.
                    // THis will wrap up any straggling text and close any open tags after it.

                    if (current.TokenizerState != TokenizerState.Finished)
                    {
                        foreach (var el in current.CloseElement(this))
                        {
                            yield return(el);
                        }
                    }
                }
                pos = current.Pos;
            }
        }
Ejemplo n.º 9
0
 void IDomNodeVisitor.Visit(DomComment comment)
 {
     VisitComment(comment);
 }
Ejemplo n.º 10
0
        protected virtual void VisitComment(DomComment comment)
        {
            if (comment == null)
                throw new ArgumentNullException("comment");

            DefaultVisit(comment);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Parse the HTML, and return it, based on options set.
        /// </summary>
        ///
        /// <returns>
        /// An enumerator of the top-level elements.
        /// </returns>

        protected IEnumerable<IDomObject> ParseImplementation()
        {
            int pos=0;
            Stack<IterationData> stack = new Stack<IterationData>();

            while (pos <= EndPos)
            {
                IterationData current = new IterationData();
                if (WrapRootTextNodes)
                {
                    current.WrapLiterals = true;
                }

                current.Reset(pos);
                stack.Push(current);

                while (stack.Count != 0)
                {

                    current = stack.Pop();

                    while (current.TokenizerState != TokenizerState.Finished && current.Pos <= EndPos)
                    {
                        char c = Html[current.Pos];
                        switch (current.TokenizerState)
                        {
                            case TokenizerState.Default:
                                if (current.FindNextTag(Html)) {

                                    // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish
                                    current.TokenizerState = TokenizerState.TagStart;
                                }
                                break;
                            case TokenizerState.TagStart:
                                IDomObject literal;
                                if (current.TryGetLiteral(this, out literal))
                                {
                                    yield return literal;
                                }
                                
                                int tagStartPos = current.Pos;
                                
                                string newTag=current.GetTagOpener(Html);

                                if (newTag == String.Empty)
                                {
                            
                            
                                    // It's a tag closer. Make sure it's the right one.
                                    current.Pos = tagStartPos + 1;
                                    ushort closeTagId = HtmlData.Tokenize(current.GetCloseTag(Html));

                                    // Ignore empty tags, or closing tags found when no parent is open
                                    bool isProperClose = closeTagId == current.ParentTagID();
                                    if (closeTagId == 0)
                                    {
                                        // ignore empty tags
                                        continue;
                                    }
                                    else
                                    {
                                        // locate match for this closer up the heirarchy
                                        IterationData actualParent =null;
                                        
                                        if (!isProperClose)
                                        {
                                            actualParent = current.Parent;
                                            while (actualParent != null && actualParent.Element.NodeNameID != closeTagId)
                                            {
                                                actualParent = actualParent.Parent;
                                            }
                                        }
                                        // if no matching close tag was found up the tree, ignore it
                                        // otherwise always close this and repeat at the same position until the match is found
                                        if (!isProperClose && actualParent == null)
                                        {
                                            current.InsertionMode = InsertionMode.Invalid;
                                            continue;
                                        }
                                    }
                                   // element is closed 
                                    
                                    if (current.Parent.Parent == null)
                                    {
                                        yield return current.Parent.Element;
                                    }
                                    current.TokenizerState = TokenizerState.Finished ;
                                    if (isProperClose)
                                    {
                                        current.Parent.Reset(current.Pos);
                                    }
                                    else
                                    {
                                        current.Parent.Reset(tagStartPos);
                                    }
                                    // already been returned before we added the children
                                    continue;
                                } 
                                else if (newTag[0] == '!')
                                {
                                    IDomSpecialElement specialElement = null;
                                    string newTagUpper = newTag.ToUpper();
                                    if (newTagUpper.StartsWith("!DOCTYPE"))
                                    {
                                        specialElement = new DomDocumentType();
                                        current.Element = specialElement;
                                    }
                                    else if (newTagUpper.StartsWith("![CDATA["))
                                    {
                                        specialElement = new DomCData();
                                        current.Element = specialElement;
                                        current.Pos = tagStartPos + 9;
                                    }
                                    else 
                                    {
                                        specialElement = new DomComment();
                                        current.Element = specialElement;
                                        if (newTag.StartsWith("!--"))
                                        {
                                            ((DomComment)specialElement).IsQuoted = true;
                                            current.Pos = tagStartPos + 4;
                                        } else {
                                            current.Pos = tagStartPos+1;
                                        }
                                    }

                                    string endTag = (current.Element is IDomComment && ((IDomComment)current.Element).IsQuoted) ? "-->" : ">";

                                    int tagEndPos = Html.Seek(endTag, current.Pos);
                                    if (tagEndPos < 0)
                                    {
                                        // if a tag is unclosed entirely, then just find a new line.
                                        tagEndPos = Html.Seek(System.Environment.NewLine, current.Pos);
                                    }
                                    if (tagEndPos < 0)
                                    {
                                        // Never closed, no newline - junk, treat it like such
                                        tagEndPos = EndPos;
                                    }

                                    specialElement.NonAttributeData = Html.SubstringBetween(current.Pos, tagEndPos);
                                    current.Pos = tagEndPos;

                                }
                                else
                                {

                                    // seems to be a new element tag, parse it.

                                    ushort newTagId = HtmlData.Tokenize(newTag);
                                    
                                    // Before we keep going see if this is an implicit close
                                    ushort parentTagId = current.ParentTagID();

                                    int lastPos = current.Pos;

                                    if (parentTagId ==0 && IsDocument) {
                                        if (newTagId != HtmlData.tagHTML) {
                                            current.Element =DomElement.Create(HtmlData.tagHTML);
                                            current = current.AddNewChild();
                                            parentTagId = HtmlData.tagHTML;
                                        }
                                    }
                                    
                                    if (parentTagId != 0)
                                    {
                                        ushort action = SpecialTagActionDelegate(parentTagId, newTagId);

                                        while (action != HtmlData.tagActionNothing)
                                        {
                                            if (action == HtmlData.tagActionClose)
                                            {

                                                // track the next parent up the chain

                                                var newNode = (current.Parent != null) ?
                                                    current.Parent : null;

                                                // same tag for a repeater like li occcurred - treat like a close tag

                                                if (current.Parent.Parent == null)
                                                {
                                                    yield return current.Parent.Element;
                                                }

                                                current.TokenizerState = TokenizerState.Finished;
                                                //current.Parent.Reset(tagStartPos);

                                                if (newNode != null && newNode.Parent != null && newNode.Parent.Element != null)
                                                {
                                                    action = SpecialTagActionDelegate(newNode.Parent.Element.NodeNameID, newTagId);
                                                    if (action != HtmlData.tagActionNothing)
                                                    {
                                                        current = newNode;
                                                    }
                                                }
                                                else
                                                {
                                                    action = HtmlData.tagActionNothing;
                                                }
                                            }
                                            else 
                                            {
                                                if (GenerateOptionalElements)
                                                {
                                                    stack.Push(current);
                                                    current = current.AddNewParent(action, lastPos);

                                                }
                                                action = HtmlData.tagActionNothing;

                                            }
                                        }
                                        if (current.TokenizerState == TokenizerState.Finished)
                                        {
                                            current.Parent.Reset(tagStartPos);
                                            continue;
                                        }

                                    }

                                    
                                    current.Element = DomElement.Create(newTagId);


                                    if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed)
                                    {
                                        current.InsertionMode = InsertionMode.Text;
                                        current.TokenizerState = TokenizerState.Default;
                                    }

                                    // Parse attribute data
                                    while (current.Pos <= EndPos)
                                    {
                                        if (!current.GetTagAttribute(Html)) break;
                                    }
                                }

                                IDomObject el;
                                
                                if (current.FinishTagOpener(Html, out el))
                                {
                                    stack.Push(current);
                                    current = current.AddNewChild();
                                }

                                if (el != null)
                                {
                                    yield return el;
                                }

                                break;

                        }
                    }


                    // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because  top-level tag was unclosed.
                    // THis will wrap up any straggling text and close any open tags after it.

                    if (current.TokenizerState != TokenizerState.Finished)
                    {
                        foreach (var el in current.CloseElement(this)) {
                            yield return el;
                        }

                    }
                }
                pos = current.Pos;
            }

        }
Ejemplo n.º 12
0
 public virtual DomComment CreateComment(string data)
 {
     var result = new DomComment();
     result.Data = data;
     return result;
 }