示例#1
0
 /// <summary>
 /// Moves to the first child of the current node.
 /// </summary>
 /// <returns>true if there is a first child node, otherwise false.</returns>
 public override bool MoveToFirstChild()
 {
     if (!_currentnode.HasChildNodes)
     {
         InternalTrace(">false");
         return false;
     }
     _currentnode = _currentnode.ChildNodes[0];
     InternalTrace(">true");
     return true;
 }
示例#2
0
        internal void CloseNode(HtmlNode endnode)
        {
            if (!_ownerdocument.OptionAutoCloseOnEnd)
            {
                // close all children
                if (_childnodes != null)
                {
                    foreach (HtmlNode child in _childnodes)
                    {
                        if (child.Closed)
                            continue;

                        // create a fake closer node
                        HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
                        close._endnode = close;
                        child.CloseNode(close);
                    }
                }
            }

            if (!Closed)
            {
                _endnode = endnode;

                if (_ownerdocument._openednodes != null)
                {
                    _ownerdocument._openednodes.Remove(_outerstartindex);
                }

                HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
                if (self == this)
                {
                    _ownerdocument._lastnodes.Remove(Name);
                    _ownerdocument.UpdateLastParentNode();
                }

                if (endnode == this)
                    return;

                // create an inner section
                _innerstartindex = _outerstartindex + _outerlength;
                _innerlength = endnode._outerstartindex - _innerstartindex;

                // update full length
                _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
            }
        }
示例#3
0
 /// <summary>
 /// Creates a duplicate of the node and the subtree under it.
 /// </summary>
 /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
 public void CopyFrom(HtmlNode node)
 {
     CopyFrom(node, true);
 }
示例#4
0
        /// <summary>
        /// Initializes HtmlNode, providing type, owner and where it exists in a collection
        /// </summary>
        /// <param name="type"></param>
        /// <param name="ownerdocument"></param>
        /// <param name="index"></param>
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            _nodetype = type;
            _ownerdocument = ownerdocument;
            _outerstartindex = index;

            switch (type)
            {
                case HtmlNodeType.Comment:
                    Name = HtmlNodeTypeNameComment;
                    _endnode = this;
                    break;

                case HtmlNodeType.Document:
                    Name = HtmlNodeTypeNameDocument;
                    _endnode = this;
                    break;

                case HtmlNodeType.Text:
                    Name = HtmlNodeTypeNameText;
                    _endnode = this;
                    break;
            }

            if (_ownerdocument._openednodes != null)
            {
                if (!Closed)
                {
                    // we use the index as the key

                    // -1 means the node comes from public
                    if (-1 != index)
                    {
                        _ownerdocument._openednodes.Add(index, this);
                    }
                }
            }

            if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
            // innerhtml and outerhtml must be calculated
            _outerchanged = true;
            _innerchanged = true;
        }
示例#5
0
        /// <summary>
        /// Replaces the child node oldChild with newChild node.
        /// </summary>
        /// <param name="newChild">The new node to put in the child list.</param>
        /// <param name="oldChild">The node being replaced in the list.</param>
        /// <returns>The node replaced.</returns>
        public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
        {
            if (newChild == null)
            {
                return RemoveChild(oldChild);
            }

            if (oldChild == null)
            {
                return AppendChild(newChild);
            }

            int index = -1;

            if (_childnodes != null)
            {
                index = _childnodes[oldChild];
            }

            if (index == -1)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }

            if (_childnodes != null) _childnodes.Replace(index, newChild);

            _ownerdocument.SetIdForNode(null, oldChild.GetId());
            _ownerdocument.SetIdForNode(newChild, newChild.GetId());
            _outerchanged = true;
            _innerchanged = true;
            return newChild;
        }
示例#6
0
        /// <summary>
        /// Loads the HTML document from the specified TextReader.
        /// </summary>
        /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
        public void Load(TextReader reader)
        {
            // all Load methods pass down to this one
            if (reader == null)
                throw new ArgumentNullException("reader");

            _onlyDetectEncoding = false;

            if (OptionCheckSyntax)
                _openednodes = new Hashtable();
            else
                _openednodes = null;

            if (OptionUseIdAttribute)
            {
                _nodesid = new Hashtable();
            }
            else
            {
                _nodesid = null;
            }

            StreamReader sr = reader as StreamReader;
            if (sr != null)
            {
                try
                {
                    // trigger bom read if needed
                    sr.Peek();
                }
                // ReSharper disable EmptyGeneralCatchClause
                catch (Exception)
                // ReSharper restore EmptyGeneralCatchClause
                {
                    // void on purpose
                }
                _streamencoding = sr.CurrentEncoding;
            }
            else
            {
                _streamencoding = null;
            }
            _declaredencoding = null;

            _text = reader.ReadToEnd();
            _documentnode = CreateNode(HtmlNodeType.Document, 0);
            Parse();

            if (OptionCheckSyntax)
            {
                foreach (HtmlNode node in _openednodes.Values)
                {
                    if (!node._starttag) // already reported
                    {
                        continue;
                    }

                    string html;
                    if (OptionExtractErrorSourceText)
                    {
                        html = node.OuterHtml;
                        if (html.Length > OptionExtractErrorSourceTextMaxLength)
                        {
                            html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
                        }
                    }
                    else
                    {
                        html = string.Empty;
                    }
                    AddError(
                        HtmlParseErrorCode.TagNotClosed,
                        node._line, node._lineposition,
                        node._streamposition, html,
                        "End tag </" + node.Name + "> was not found");
                }

                // we don't need this anymore
                _openednodes.Clear();
            }
        }
示例#7
0
        /// <summary>
        /// Clone and entitize an HtmlNode. This will affect attribute values and nodes' text. It will also entitize all child nodes.
        /// </summary>
        /// <param name="node">The node to entitize.</param>
        /// <returns>An entitized cloned node.</returns>
        public static HtmlNode Entitize(HtmlNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }
            HtmlNode result = node.CloneNode(true);
            if (result.HasAttributes)
                Entitize(result.Attributes);

            if (result.HasChildNodes)
            {
                Entitize(result.ChildNodes);
            }
            else
            {
                if (result.NodeType == HtmlNodeType.Text)
                {
                    ((HtmlTextNode) result).Text = Entitize(((HtmlTextNode) result).Text, true, true);
                }
            }
            return result;
        }
示例#8
0
 private void Reset()
 {
     InternalTrace(null);
     _currentnode = _doc.DocumentNode;
     _attindex = -1;
 }
示例#9
0
 internal void UpdateLastParentNode()
 {
     do
     {
         if (_lastparentnode.Closed)
         {
             _lastparentnode = _lastparentnode.ParentNode;
         }
     } while ((_lastparentnode != null) && (_lastparentnode.Closed));
     if (_lastparentnode == null)
     {
         _lastparentnode = _documentnode;
     }
 }
示例#10
0
 /// <summary>
 /// Moves to the previous sibling of the current node.
 /// </summary>
 /// <returns>true if the navigator is successful moving to the previous sibling node, false if there is no previous sibling or if the navigator is currently positioned on an attribute node.</returns>
 public override bool MoveToPrevious()
 {
     if (_currentnode.PreviousSibling == null)
     {
         InternalTrace(">false");
         return false;
     }
     _currentnode = _currentnode.PreviousSibling;
     InternalTrace(">true");
     return true;
 }
示例#11
0
 /// <summary>
 /// Moves to the root node to which the current node belongs.
 /// </summary>
 public override void MoveToRoot()
 {
     _currentnode = _doc.DocumentNode;
     InternalTrace(null);
 }
示例#12
0
 /// <summary>
 /// Moves to the parent of the current node.
 /// </summary>
 /// <returns>true if there is a parent node, otherwise false.</returns>
 public override bool MoveToParent()
 {
     if (_currentnode.ParentNode == null)
     {
         InternalTrace(">false");
         return false;
     }
     _currentnode = _currentnode.ParentNode;
     InternalTrace(">true");
     return true;
 }
示例#13
0
 /// <summary>
 /// Moves to the next sibling of the current node.
 /// </summary>
 /// <returns>true if the navigator is successful moving to the next sibling node, false if there are no more siblings or if the navigator is currently positioned on an attribute node. If false, the position of the navigator is unchanged.</returns>
 public override bool MoveToNext()
 {
     if (_currentnode.NextSibling == null)
     {
         InternalTrace(">false");
         return false;
     }
     InternalTrace("_c=" + _currentnode.CloneNode(false).OuterHtml);
     InternalTrace("_n=" + _currentnode.NextSibling.CloneNode(false).OuterHtml);
     _currentnode = _currentnode.NextSibling;
     InternalTrace(">true");
     return true;
 }
示例#14
0
 /// <summary>
 /// Moves to the node that has an attribute of type ID whose value matches the specified string.
 /// </summary>
 /// <param name="id">A string representing the ID value of the node to which you want to move. This argument does not need to be atomized.</param>
 /// <returns>true if the move was successful, otherwise false. If false, the position of the navigator is unchanged.</returns>
 public override bool MoveToId(string id)
 {
     InternalTrace("id=" + id);
     HtmlNode node = _doc.GetElementbyId(id);
     if (node == null)
     {
         InternalTrace(">false");
         return false;
     }
     _currentnode = node;
     InternalTrace(">true");
     return true;
 }
示例#15
0
        private void ReadDocumentEncoding(HtmlNode node)
        {
            if (!OptionReadEncoding)
                return;
            // format is
            // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />

            // when we append a child, we are in node end, so attributes are already populated
            if (node._namelength != 4) // quick check, avoids string alloc
                return;
            if (node.Name != "meta") // all nodes names are lowercase
                return;
            HtmlAttribute att = node.Attributes["http-equiv"];
            if (att == null)
                return;
            if (string.Compare(att.Value, "content-type", true) != 0)
                return;
            HtmlAttribute content = node.Attributes["content"];
            if (content != null)
            {
                string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
                if (!string.IsNullOrEmpty(charset))
                {
                    // The following check fixes the the bug described at: http://htmlagilitypack.codeplex.com/WorkItem/View.aspx?WorkItemId=25273
                    if (string.Equals(charset, "utf8", StringComparison.OrdinalIgnoreCase))
                        charset = "utf-8";
                    try
                    {
                        _declaredencoding = Encoding.GetEncoding(charset);
                    }
                    catch (ArgumentException)
                    {
                        _declaredencoding = null;
                    }
                    if (_onlyDetectEncoding)
                    {
                        throw new EncodingFoundException(_declaredencoding);
                    }

                    if (_streamencoding != null)
                    {
                        if (_declaredencoding.WindowsCodePage != _streamencoding.WindowsCodePage)
                        {
                            AddError(
                                HtmlParseErrorCode.CharsetMismatch,
                                _line, _lineposition,
                                _index, node.OuterHtml,
                                "Encoding mismatch between StreamEncoding: " +
                                _streamencoding.WebName + " and DeclaredEncoding: " +
                                _declaredencoding.WebName);
                        }
                    }
                }

            }
        }
示例#16
0
 private HtmlNode FindResetterNode(HtmlNode node, string name)
 {
     HtmlNode resetter = (HtmlNode)_lastnodes[name];
     if (resetter == null)
         return null;
     if (resetter.Closed)
     {
         return null;
     }
     if (resetter._streamposition < node._streamposition)
     {
         return null;
     }
     return resetter;
 }
示例#17
0
        /// <summary>
        /// Detects the encoding of an HTML text provided on a TextReader.
        /// </summary>
        /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
        /// <returns>The detected encoding.</returns>
        public Encoding DetectEncoding(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }
            _onlyDetectEncoding = true;
            if (OptionCheckSyntax)
            {
                _openednodes = new Hashtable();
            }
            else
            {
                _openednodes = null;
            }

            if (OptionUseIdAttribute)
            {
                _nodesid = new Hashtable();
            }
            else
            {
                _nodesid = null;
            }

            StreamReader sr = reader as StreamReader;
            if (sr != null)
            {
                _streamencoding = sr.CurrentEncoding;
            }
            else
            {
                _streamencoding = null;
            }
            _declaredencoding = null;

            _text = reader.ReadToEnd();
            _documentnode = CreateNode(HtmlNodeType.Document, 0);

            // this is almost a hack, but it allows us not to muck with the original parsing code
            try
            {
                Parse();
            }
            catch (EncodingFoundException ex)
            {
                return ex.Encoding;
            }
            return null;
        }
示例#18
0
 private bool FindResetterNodes(HtmlNode node, string[] names)
 {
     if (names == null)
     {
         return false;
     }
     for (int i = 0; i < names.Length; i++)
     {
         if (FindResetterNode(node, names[i]) != null)
         {
             return true;
         }
     }
     return false;
 }
示例#19
0
        internal void SetIdForNode(HtmlNode node, string id)
        {
            if (!OptionUseIdAttribute)
            {
                return;
            }

            if ((_nodesid == null) || (id == null))
            {
                return;
            }

            if (node == null)
            {
                _nodesid.Remove(id.ToLower());
            }
            else
            {
                _nodesid[id.ToLower()] = node;
            }
        }
示例#20
0
        private void FixNestedTag(string name, string[] resetters)
        {
            if (resetters == null)
                return;

            HtmlNode prev;

            // if we find a previous unclosed same name node, without a resetter node between, we must close it
            prev = (HtmlNode)_lastnodes[name];
            if ((prev != null) && (!prev.Closed))
            {
                // try to find a resetter node, if found, we do nothing
                if (FindResetterNodes(prev, resetters))
                {
                    return;
                }

                // ok we need to close the prev now
                // create a fake closer node
                HtmlNode close = new HtmlNode(prev.NodeType, this, -1);
                close._endnode = close;
                prev.CloseNode(close);
            }
        }
示例#21
0
        /// <summary>
        /// Inserts the specified node immediately before the specified reference node.
        /// </summary>
        /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
        /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
        /// <returns>The node being inserted.</returns>
        public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
        {
            if (newChild == null)
            {
                throw new ArgumentNullException("newChild");
            }

            if (refChild == null)
            {
                return AppendChild(newChild);
            }

            if (newChild == refChild)
            {
                return newChild;
            }

            int index = -1;

            if (_childnodes != null)
            {
                index = _childnodes[refChild];
            }

            if (index == -1)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }

            if (_childnodes != null) _childnodes.Insert(index, newChild);

            _ownerdocument.SetIdForNode(newChild, newChild.GetId());
            _outerchanged = true;
            _innerchanged = true;
            return newChild;
        }
示例#22
0
        private void Parse()
        {
            int lastquote = 0;
            if (OptionComputeChecksum)
            {
                _crc32 = new Crc32();
            }

            _lastnodes = new Hashtable();
            _c = 0;
            _fullcomment = false;
            _parseerrors = new List<HtmlParseError>();
            _line = 1;
            _lineposition = 1;
            _maxlineposition = 1;

            _state = ParseState.Text;
            _oldstate = _state;
            _documentnode._innerlength = _text.Length;
            _documentnode._outerlength = _text.Length;
            _remainderOffset = _text.Length;

            _lastparentnode = _documentnode;
            _currentnode = CreateNode(HtmlNodeType.Text, 0);
            _currentattribute = null;

            _index = 0;
            PushNodeStart(HtmlNodeType.Text, 0);
            while (_index < _text.Length)
            {
                _c = _text[_index];
                IncrementPosition();

                switch (_state)
                {
                    case ParseState.Text:
                        if (NewCheck())
                            continue;
                        break;

                    case ParseState.WhichTag:
                        if (NewCheck())
                            continue;
                        if (_c == '/')
                        {
                            PushNodeNameStart(false, _index);
                        }
                        else
                        {
                            PushNodeNameStart(true, _index - 1);
                            DecrementPosition();
                        }
                        _state = ParseState.Tag;
                        break;

                    case ParseState.Tag:
                        if (NewCheck())
                            continue;
                        if (IsWhiteSpace(_c))
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }
                        if (_c == '/')
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.EmptyTag;
                            continue;
                        }
                        if (_c == '>')
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                        }
                        break;

                    case ParseState.BetweenAttributes:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;

                        if ((_c == '/') || (_c == '?'))
                        {
                            _state = ParseState.EmptyTag;
                            continue;
                        }

                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }

                            if (_state != ParseState.BetweenAttributes)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }

                        PushAttributeNameStart(_index - 1);
                        _state = ParseState.AttributeName;
                        break;

                    case ParseState.EmptyTag:
                        if (NewCheck())
                            continue;

                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, true))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }

                            if (_state != ParseState.EmptyTag)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        _state = ParseState.BetweenAttributes;
                        break;

                    case ParseState.AttributeName:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                        {
                            PushAttributeNameEnd(_index - 1);
                            _state = ParseState.AttributeBeforeEquals;
                            continue;
                        }
                        if (_c == '=')
                        {
                            PushAttributeNameEnd(_index - 1);
                            _state = ParseState.AttributeAfterEquals;
                            continue;
                        }
                        if (_c == '>')
                        {
                            PushAttributeNameEnd(_index - 1);
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeName)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.AttributeBeforeEquals:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;
                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeBeforeEquals)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        if (_c == '=')
                        {
                            _state = ParseState.AttributeAfterEquals;
                            continue;
                        }
                        // no equals, no whitespace, it's a new attrribute starting
                        _state = ParseState.BetweenAttributes;
                        DecrementPosition();
                        break;

                    case ParseState.AttributeAfterEquals:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;

                        if ((_c == '\'') || (_c == '"'))
                        {
                            _state = ParseState.QuotedAttributeValue;
                            PushAttributeValueStart(_index, _c);
                            lastquote = _c;
                            continue;
                        }
                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeAfterEquals)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        PushAttributeValueStart(_index - 1);
                        _state = ParseState.AttributeValue;
                        break;

                    case ParseState.AttributeValue:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                        {
                            PushAttributeValueEnd(_index - 1);
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }

                        if (_c == '>')
                        {
                            PushAttributeValueEnd(_index - 1);
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeValue)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.QuotedAttributeValue:
                        if (_c == lastquote)
                        {
                            PushAttributeValueEnd(_index - 1);
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }
                        if (_c == '<')
                        {
                            if (_index < _text.Length)
                            {
                                if (_text[_index] == '%')
                                {
                                    _oldstate = _state;
                                    _state = ParseState.ServerSideCode;
                                    continue;
                                }
                            }
                        }
                        break;

                    case ParseState.Comment:
                        if (_c == '>')
                        {
                            if (_fullcomment)
                            {
                                if ((_text[_index - 2] != '-') ||
                                    (_text[_index - 3] != '-'))
                                {
                                    continue;
                                }
                            }
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = _text.Length;
                                break;
                            }
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.ServerSideCode:
                        if (_c == '%')
                        {
                            if (_index < _text.Length)
                            {
                                if (_text[_index] == '>')
                                {
                                    switch (_oldstate)
                                    {
                                        case ParseState.AttributeAfterEquals:
                                            _state = ParseState.AttributeValue;
                                            break;

                                        case ParseState.BetweenAttributes:
                                            PushAttributeNameEnd(_index + 1);
                                            _state = ParseState.BetweenAttributes;
                                            break;

                                        default:
                                            _state = _oldstate;
                                            break;
                                    }
                                    IncrementPosition();
                                }
                            }
                        }
                        break;

                    case ParseState.PcData:
                        // look for </tag + 1 char

                        // check buffer end
                        if ((_currentnode._namelength + 3) <= (_text.Length - (_index - 1)))
                        {
                            if (string.Compare(_text.Substring(_index - 1, _currentnode._namelength + 2),
                                               "</" + _currentnode.Name, true) == 0)
                            {
                                int c = _text[_index - 1 + 2 + _currentnode.Name.Length];
                                if ((c == '>') || (IsWhiteSpace(c)))
                                {
                                    // add the script as a text node
                                    HtmlNode script = CreateNode(HtmlNodeType.Text,
                                                                 _currentnode._outerstartindex +
                                                                 _currentnode._outerlength);
                                    script._outerlength = _index - 1 - script._outerstartindex;
                                    _currentnode.AppendChild(script);

                                    PushNodeStart(HtmlNodeType.Element, _index - 1);
                                    PushNodeNameStart(false, _index - 1 + 2);
                                    _state = ParseState.Tag;
                                    IncrementPosition();
                                }
                            }
                        }
                        break;
                }
            }

            // finish the current work
            if (_currentnode._namestartindex > 0)
            {
                PushNodeNameEnd(_index);
            }
            PushNodeEnd(_index, false);

            // we don't need this anymore
            _lastnodes.Clear();
        }
示例#23
0
        /// <summary>
        /// Removes the specified child node.
        /// </summary>
        /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
        /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
        /// <returns>The node removed.</returns>
        public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
        {
            if (oldChild == null)
            {
                throw new ArgumentNullException("oldChild");
            }

            if ((oldChild._childnodes != null) && keepGrandChildren)
            {
                // get prev sibling
                HtmlNode prev = oldChild.PreviousSibling;

                // reroute grand children to ourselves
                foreach (HtmlNode grandchild in oldChild._childnodes)
                {
                    InsertAfter(grandchild, prev);
                }
            }
            RemoveChild(oldChild);
            _outerchanged = true;
            _innerchanged = true;
            return oldChild;
        }
示例#24
0
 /// <summary>
 /// Creates an instance of an HTML document.
 /// </summary>
 public HtmlDocument()
 {
     _documentnode = CreateNode(HtmlNodeType.Document, 0);
 }
示例#25
0
 internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
 {
     if (!node.HasAttributes)
     {
         return;
     }
     // we use Hashitems to make sure attributes are written only once
     foreach (HtmlAttribute att in node.Attributes.Hashitems.Values)
     {
         writer.WriteAttributeString(att.XmlName, att.Value);
     }
 }
示例#26
0
        private bool PushNodeEnd(int index, bool close)
        {
            _currentnode._outerlength = index - _currentnode._outerstartindex;

            if ((_currentnode._nodetype == HtmlNodeType.Text) ||
                (_currentnode._nodetype == HtmlNodeType.Comment))
            {
                // forget about void nodes
                if (_currentnode._outerlength > 0)
                {
                    _currentnode._innerlength = _currentnode._outerlength;
                    _currentnode._innerstartindex = _currentnode._outerstartindex;
                    if (_lastparentnode != null)
                    {
                        _lastparentnode.AppendChild(_currentnode);
                    }
                }
            }
            else
            {
                if ((_currentnode._starttag) && (_lastparentnode != _currentnode))
                {
                    // add to parent node
                    if (_lastparentnode != null)
                    {
                        _lastparentnode.AppendChild(_currentnode);
                    }

                    ReadDocumentEncoding(_currentnode);

                    // remember last node of this kind
                    HtmlNode prev = (HtmlNode)_lastnodes[_currentnode.Name];
                    _currentnode._prevwithsamename = prev;
                    _lastnodes[_currentnode.Name] = _currentnode;

                    // change parent?
                    if ((_currentnode.NodeType == HtmlNodeType.Document) ||
                        (_currentnode.NodeType == HtmlNodeType.Element))
                    {
                        _lastparentnode = _currentnode;
                    }

                    if (HtmlNode.IsCDataElement(CurrentNodeName()))
                    {
                        _state = ParseState.PcData;
                        return true;
                    }

                    if ((HtmlNode.IsClosedElement(_currentnode.Name)) ||
                        (HtmlNode.IsEmptyElement(_currentnode.Name)))
                    {
                        close = true;
                    }
                }
            }

            if ((close) || (!_currentnode._starttag))
            {
                if ((OptionStopperNodeName != null) && (_remainder == null) &&
                    (string.Compare(_currentnode.Name, OptionStopperNodeName, true) == 0))
                {
                    _remainderOffset = index;
                    _remainder = _text.Substring(_remainderOffset);
                    CloseCurrentNode();
                    return false; // stop parsing
                }
                CloseCurrentNode();
            }
            return true;
        }
示例#27
0
        /// <summary>
        /// Adds the specified node to the end of the list of children of this node.
        /// </summary>
        /// <param name="newChild">The node to add. May not be null.</param>
        /// <returns>The node added.</returns>
        public HtmlNode AppendChild(HtmlNode newChild)
        {
            if (newChild == null)
            {
                throw new ArgumentNullException("newChild");
            }

            ChildNodes.Append(newChild);
            _ownerdocument.SetIdForNode(newChild, newChild.GetId());
            _outerchanged = true;
            _innerchanged = true;
            return newChild;
        }
示例#28
0
 private void PushNodeStart(HtmlNodeType type, int index)
 {
     _currentnode = CreateNode(type, index);
     _currentnode._line = _line;
     _currentnode._lineposition = _lineposition;
     if (type == HtmlNodeType.Element)
     {
         _currentnode._lineposition--;
     }
     _currentnode._streamposition = index;
 }
示例#29
0
        /// <summary>
        /// Creates a duplicate of the node.
        /// </summary>
        /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
        /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
        public void CopyFrom(HtmlNode node, bool deep)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            Attributes.RemoveAll();
            if (node.HasAttributes)
            {
                foreach (HtmlAttribute att in node.Attributes)
                {
                    SetAttributeValue(att.Name, att.Value);
                }
            }

            if (!deep)
            {
                RemoveAllChildren();
                if (node.HasChildNodes)
                {
                    foreach (HtmlNode child in node.ChildNodes)
                    {
                        AppendChild(child.CloneNode(true));
                    }
                }
            }
        }
示例#30
0
        /// <summary>
        /// Moves to the same position as the specified HtmlNavigator.
        /// </summary>
        /// <param name="other">The HtmlNavigator positioned on the node that you want to move to.</param>
        /// <returns>true if successful, otherwise false. If false, the position of the navigator is unchanged.</returns>
        public override bool MoveTo(XPathNavigator other)
        {
            HtmlNodeNavigator nav = other as HtmlNodeNavigator;
            if (nav == null)
            {
                InternalTrace(">false (nav is not an HtmlNodeNavigator)");
                return false;
            }
            InternalTrace("moveto oid=" + nav.GetHashCode()
                          + ", n:" + nav._currentnode.Name
                          + ", a:" + nav._attindex);

            if (nav._doc == _doc)
            {
                _currentnode = nav._currentnode;
                _attindex = nav._attindex;
                InternalTrace(">true");
                return true;
            }
            // we don't know how to handle that
            InternalTrace(">false (???)");
            return false;
        }