Пример #1
0
        /// <summary>
        /// The set id for node.
        /// </summary>
        /// <param name="node">
        /// The node.
        /// </param>
        /// <param name="id">
        /// The id.
        /// </param>
        internal void SetIdForNode(HtmlNode node, string id)
        {
            if (!this.OptionUseIdAttribute)
            {
                return;
            }

            if ((this.Nodesid == null) || (id == null))
            {
                return;
            }

            if (node == null)
            {
                this.Nodesid.Remove(id.ToLower());
            }
            else
            {
                this.Nodesid[id.ToLower()] = node;
            }
        }
Пример #2
0
        /// <summary>
        /// The close node.
        /// </summary>
        /// <param name="htmlendnode">The end node.</param>
        internal void CloseNode(HtmlNode htmlendnode)
        {
            if (!this.OwnerDocument.OptionAutoCloseOnEnd)
            {
                // close all children
                if (this.childnodes != null)
                {
                    foreach (HtmlNode child in this.childnodes)
                    {
                        if (child.Closed)
                        {
                            continue;
                        }

                        // create a fake closer node
                        var close = new HtmlNode(this.NodeType, this.OwnerDocument, -1);
                        close.EndNode = close;
                        child.CloseNode(close);
                    }
                }
            }

            if (!this.Closed)
            {
                this.EndNode = htmlendnode;

                if (this.OwnerDocument.Openednodes != null)
                {
                    this.OwnerDocument.Openednodes.Remove(this.OuterStartIndex);
                }

                HtmlNode self = this.OwnerDocument.LastNodes.GetDictionaryValueOrNull(this.NodeName);
                if (self == this)
                {
                    this.OwnerDocument.LastNodes.Remove(this.NodeName);
                    this.OwnerDocument.UpdateLastParentNode();
                }

                if (htmlendnode == this)
                {
                    return;
                }

                // create an inner section
                this.InnerStartIndex = this.OuterStartIndex + this.OuterLength;
                this.InnerLength = htmlendnode.OuterStartIndex - this.InnerStartIndex;

                // update full length
                this.OuterLength = (htmlendnode.OuterStartIndex + htmlendnode.OuterLength) - this.OuterStartIndex;
            }
        }
Пример #3
0
        /// <summary>
        /// Creates a duplicate of the node.
        /// </summary>
        /// <param name="node">
        /// The node to duplicate. May not be <c>null</c> . 
        /// </param>
        /// <param name="deep">
        /// true to recursively clone the subtree under the specified node, false to clone only the node itself. 
        /// </param>
        public void CopyFrom(HtmlNode node, bool deep = true)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            this.Attributes.RemoveAll();
            if (node.HasAttributes)
            {
                foreach (HtmlAttribute att in node.Attributes)
                {
                    this.SetAttributeValue(att.Name, att.Value);
                }
            }

            if (!deep)
            {
                this.RemoveAllChildren();
                if (node.HasChildNodes)
                {
                    foreach (HtmlNode child in node.ChildNodes)
                    {
                        this.AppendChild(child.CloneNode(true));
                    }
                }
            }
        }
Пример #4
0
        /// <summary>
        /// Inserts the specified node immediately before the specified reference node.
        /// </summary>
        /// <param name="newChild">
        /// The node to insert. May not be <c>null</c> . 
        /// </param>
        /// <param name="refChild">
        /// The node that is the reference node. The newChild is placed before this node. 
        /// </param>
        /// <returns>
        /// The node being inserted. 
        /// </returns>
        public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
        {
            if (newChild == null)
            {
                throw new ArgumentNullException("newChild");
            }

            if (refChild == null)
            {
                return this.AppendChild(newChild);
            }

            if (newChild == refChild)
            {
                return newChild;
            }

            int index = -1;

            if (this.childnodes != null)
            {
                index = this.childnodes[refChild];
            }

            if (index == -1)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }

            if (this.childnodes != null)
            {
                this.childnodes.Insert(index, newChild);
            }

            this.OwnerDocument.SetIdForNode(newChild, newChild.GetId());
            this.OuterChanged = true;
            this.InnerChanged = true;
            return newChild;
        }
Пример #5
0
        /// <summary>
        /// Replaces the child node oldChild with newChild node.
        /// </summary>
        /// <param name="newChild">
        /// The new node to put in the child list. 
        /// </param>
        /// <param name="oldChild">
        /// The node being replaced in the list. 
        /// </param>
        /// <returns>
        /// The node replaced. 
        /// </returns>
        public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
        {
            if (newChild == null)
            {
                return this.RemoveChild(oldChild);
            }

            if (oldChild == null)
            {
                return this.AppendChild(newChild);
            }

            int index = -1;

            if (this.childnodes != null)
            {
                index = this.childnodes[oldChild];
            }

            if (index == -1)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }

            if (this.childnodes != null)
            {
                this.childnodes.Replace(index, newChild);
            }

            this.OwnerDocument.SetIdForNode(null, oldChild.GetId());
            this.OwnerDocument.SetIdForNode(newChild, newChild.GetId());
            this.OuterChanged = true;
            this.InnerChanged = true;
            return newChild;
        }
Пример #6
0
        /// <summary>
        /// Moves to the parent of the current node.
        /// </summary>
        /// <returns>
        /// true if there is a parent node, otherwise false. 
        /// </returns>
        public override bool MoveToParent()
        {
            if (this.currentnode.ParentNode == null)
            {
                return false;
            }

            this.currentnode = this.currentnode.ParentNode;
            return true;
        }
Пример #7
0
 /// <summary>
 /// Moves to the root node to which the current node belongs.
 /// </summary>
 public override void MoveToRoot()
 {
     this.currentnode = this.htmlDocument.DocumentNode;
 }
Пример #8
0
        /// <summary>
        /// The push node end.
        /// </summary>
        /// <param name="index">The index.</param>
        /// <param name="close">The close.</param>
        /// <returns>
        /// The node.
        /// </returns>
        private bool PushNodeEnd(int index, bool close)
        {
            this.currentnode.OuterLength = index - this.currentnode.OuterStartIndex;

            if ((this.currentnode.NodeType == HtmlNodeType.Text)
                || (this.currentnode.NodeType == HtmlNodeType.Comment))
            {
                // forget about void nodes
                if (this.currentnode.OuterLength > 0)
                {
                    this.currentnode.InnerLength = this.currentnode.OuterLength;
                    this.currentnode.InnerStartIndex = this.currentnode.OuterStartIndex;
                    if (this.lastparentnode != null)
                    {
                        this.lastparentnode.AppendChild(this.currentnode);
                    }
                }
            }
            else
            {
                if (this.currentnode.StartTag && (this.lastparentnode != this.currentnode))
                {
                    // add to parent node
                    if (this.lastparentnode != null)
                    {
                        this.lastparentnode.AppendChild(this.currentnode);
                    }

                    this.ReadDocumentEncoding(this.currentnode);

                    // remember last node of this kind
                    HtmlNode prev = this.LastNodes.GetDictionaryValueOrNull(this.currentnode.NodeName);

                    this.currentnode.PreviousWithSameName = prev;
                    this.LastNodes[this.currentnode.NodeName] = this.currentnode;

                    // change parent?
                    if ((this.currentnode.NodeType == HtmlNodeType.Document)
                        || (this.currentnode.NodeType == HtmlNodeType.Element))
                    {
                        this.lastparentnode = this.currentnode;
                    }

                    if (HtmlNode.IsCDataElement(this.CurrentNodeName()))
                    {
                        this.state = ParseState.PcData;
                        return true;
                    }

                    if (HtmlNode.IsClosedElement(this.currentnode.NodeName) || HtmlNode.IsEmptyElement(this.currentnode.NodeName))
                    {
                        close = true;
                    }
                }
            }

            if (close || (!this.currentnode.StartTag))
            {
                if ((this.OptionStopperNodeName != null) && (this.Remainder == null)
                    &&
                    (string.Compare(this.currentnode.NodeName, this.OptionStopperNodeName, StringComparison.OrdinalIgnoreCase) == 0))
                {
                    this.RemainderOffset = index;
                    this.Remainder = this.Text.Substring(this.RemainderOffset);
                    this.CloseCurrentNode();
                    return false; // stop parsing
                }

                this.CloseCurrentNode();
            }

            return true;
        }
Пример #9
0
        /// <summary>
        /// The push node start.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="index">The index.</param>
        private void PushNodeStart(HtmlNodeType type, int index)
        {
            this.currentnode = this.CreateNode(type, index);
            this.currentnode.Line = this.line;
            this.currentnode.LinePosition = this.lineposition;
            if (type == HtmlNodeType.Element)
            {
                this.currentnode.LinePosition--;
            }

            this.currentnode.StreamPosition = index;
        }
Пример #10
0
        /// <summary>
        /// The fix nested tag.
        /// </summary>
        /// <param name="name">The name.</param>
        /// <param name="resetters">The resetters.</param>
        private void FixNestedTag(string name, IEnumerable<string> resetters)
        {
            if (resetters == null)
            {
                return;
            }

            HtmlNode prev = this.LastNodes.GetDictionaryValueOrNull(this.currentnode.NodeName);

            // if we find a previous unclosed same name node, without a resetter node between, we must close it
            if (prev == null || this.LastNodes[name].Closed)
            {
                return;
            }

            // try to find a resetter node, if found, we do nothing
            if (this.FindResetterNodes(prev, resetters))
            {
                return;
            }

            // ok we need to close the prev now
            // create a fake closer node
            var close = new HtmlNode(prev.NodeType, this, -1);
            close.EndNode = close;
            prev.CloseNode(close);
        }
Пример #11
0
        /// <summary>
        /// The parse.
        /// </summary>
        private void Parse()
        {
            int lastquote = 0;

            this.LastNodes = new Dictionary<string, HtmlNode>();
            this.currentchar = 0;
            this.fullcomment = false;
            this.parseerrors = new List<HtmlParseError>();
            this.line = 1;
            this.lineposition = 1;
            this.maxlineposition = 1;

            this.state = ParseState.Text;
            this.oldstate = this.state;
            this.documentnode.InnerLength = this.Text.Length;
            this.documentnode.OuterLength = this.Text.Length;
            this.RemainderOffset = this.Text.Length;

            this.lastparentnode = this.documentnode;
            this.currentnode = this.CreateNode(HtmlNodeType.Text, 0);
            this.currentattribute = null;

            this.cuurrentindex = 0;
            this.PushNodeStart(HtmlNodeType.Text, 0);
            while (this.cuurrentindex < this.Text.Length)
            {
                this.currentchar = this.Text[this.cuurrentindex];
                this.IncrementPosition();

                switch (this.state)
                {
                    case ParseState.Text:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        break;

                    case ParseState.WhichTag:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (this.currentchar == '/')
                        {
                            this.PushNodeNameStart(false, this.cuurrentindex);
                        }
                        else
                        {
                            this.PushNodeNameStart(true, this.cuurrentindex - 1);
                            this.DecrementPosition();
                        }

                        this.state = ParseState.Tag;
                        break;

                    case ParseState.Tag:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            this.PushNodeNameEnd(this.cuurrentindex - 1);
                            if (this.state != ParseState.Tag)
                            {
                                continue;
                            }

                            this.state = ParseState.BetweenAttributes;
                            continue;
                        }

                        if (this.currentchar == '/')
                        {
                            this.PushNodeNameEnd(this.cuurrentindex - 1);
                            if (this.state != ParseState.Tag)
                            {
                                continue;
                            }

                            this.state = ParseState.EmptyTag;
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            this.PushNodeNameEnd(this.cuurrentindex - 1);
                            if (this.state != ParseState.Tag)
                            {
                                continue;
                            }

                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.Tag)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                        }

                        break;

                    case ParseState.BetweenAttributes:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            continue;
                        }

                        if ((this.currentchar == '/') || (this.currentchar == '?'))
                        {
                            this.state = ParseState.EmptyTag;
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.BetweenAttributes)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        this.PushAttributeNameStart(this.cuurrentindex - 1);
                        this.state = ParseState.AttributeName;
                        break;

                    case ParseState.EmptyTag:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            if (!this.PushNodeEnd(this.cuurrentindex, true))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.EmptyTag)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        this.state = ParseState.BetweenAttributes;
                        break;

                    case ParseState.AttributeName:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            this.PushAttributeNameEnd(this.cuurrentindex - 1);
                            this.state = ParseState.AttributeBeforeEquals;
                            continue;
                        }

                        if (this.currentchar == '=')
                        {
                            this.PushAttributeNameEnd(this.cuurrentindex - 1);
                            this.state = ParseState.AttributeAfterEquals;
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            this.PushAttributeNameEnd(this.cuurrentindex - 1);
                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.AttributeName)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        break;

                    case ParseState.AttributeBeforeEquals:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.AttributeBeforeEquals)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        if (this.currentchar == '=')
                        {
                            this.state = ParseState.AttributeAfterEquals;
                            continue;
                        }

                        // no equals, no whitespace, it's a new attrribute starting
                        this.state = ParseState.BetweenAttributes;
                        this.DecrementPosition();
                        break;

                    case ParseState.AttributeAfterEquals:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            continue;
                        }

                        if ((this.currentchar == '\'') || (this.currentchar == '"'))
                        {
                            this.state = ParseState.QuotedAttributeValue;
                            this.PushAttributeValueStart(this.cuurrentindex, this.currentchar);
                            lastquote = this.currentchar;
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.AttributeAfterEquals)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        this.PushAttributeValueStart(this.cuurrentindex - 1);
                        this.state = ParseState.AttributeValue;
                        break;

                    case ParseState.AttributeValue:
                        if (this.NewCheck())
                        {
                            continue;
                        }

                        if (IsWhiteSpace(this.currentchar))
                        {
                            this.PushAttributeValueEnd(this.cuurrentindex - 1);
                            this.state = ParseState.BetweenAttributes;
                            continue;
                        }

                        if (this.currentchar == '>')
                        {
                            this.PushAttributeValueEnd(this.cuurrentindex - 1);
                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            if (this.state != ParseState.AttributeValue)
                            {
                                continue;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        break;

                    case ParseState.QuotedAttributeValue:
                        if (this.currentchar == lastquote)
                        {
                            this.PushAttributeValueEnd(this.cuurrentindex - 1);
                            this.state = ParseState.BetweenAttributes;
                            continue;
                        }

                        if (this.currentchar == '<')
                        {
                            if (this.cuurrentindex < this.Text.Length)
                            {
                                if (this.Text[this.cuurrentindex] == '%')
                                {
                                    this.oldstate = this.state;
                                    this.state = ParseState.ServerSideCode;
                                    continue;
                                }
                            }
                        }

                        break;

                    case ParseState.Comment:
                        if (this.currentchar == '>')
                        {
                            if (this.fullcomment)
                            {
                                if ((this.Text[this.cuurrentindex - 2] != '-') || (this.Text[this.cuurrentindex - 3] != '-'))
                                {
                                    continue;
                                }
                            }

                            if (!this.PushNodeEnd(this.cuurrentindex, false))
                            {
                                // stop parsing
                                this.cuurrentindex = this.Text.Length;
                                break;
                            }

                            this.state = ParseState.Text;
                            this.PushNodeStart(HtmlNodeType.Text, this.cuurrentindex);
                            continue;
                        }

                        break;

                    case ParseState.ServerSideCode:
                        if (this.currentchar == '%')
                        {
                            if (this.cuurrentindex < this.Text.Length)
                            {
                                if (this.Text[this.cuurrentindex] == '>')
                                {
                                    switch (this.oldstate)
                                    {
                                        case ParseState.AttributeAfterEquals:
                                            this.state = ParseState.AttributeValue;
                                            break;

                                        case ParseState.BetweenAttributes:
                                            this.PushAttributeNameEnd(this.cuurrentindex + 1);
                                            this.state = ParseState.BetweenAttributes;
                                            break;

                                        default:
                                            this.state = this.oldstate;
                                            break;
                                    }

                                    this.IncrementPosition();
                                }
                            }
                        }

                        break;

                    case ParseState.PcData:

                        // look for </tag + 1 char

                        // check buffer end
                        if ((this.currentnode.NameLength + 3) <= (this.Text.Length - (this.cuurrentindex - 1)))
                        {
                            if (string.Compare(
                                this.Text.Substring(this.cuurrentindex - 1, this.currentnode.NameLength + 2),
                                "</" + this.currentnode.NodeName,
                                StringComparison.OrdinalIgnoreCase) == 0)
                            {
                                int c = this.Text[this.cuurrentindex - 1 + 2 + this.currentnode.NodeName.Length];
                                if ((c == '>') || IsWhiteSpace(c))
                                {
                                    // add the script as a text node
                                    HtmlNode script = this.CreateNode(
                                        HtmlNodeType.Text,
                                        this.currentnode.OuterStartIndex + this.currentnode.OuterLength);
                                    script.OuterLength = this.cuurrentindex - 1 - script.OuterStartIndex;
                                    this.currentnode.AppendChild(script);

                                    this.PushNodeStart(HtmlNodeType.Element, this.cuurrentindex - 1);
                                    this.PushNodeNameStart(false, this.cuurrentindex - 1 + 2);
                                    this.state = ParseState.Tag;
                                    this.IncrementPosition();
                                }
                            }
                        }

                        break;
                }
            }

            // finish the current work
            if (this.currentnode.NameStartIndex > 0)
            {
                this.PushNodeNameEnd(this.cuurrentindex);
            }

            this.PushNodeEnd(this.cuurrentindex, false);

            // we don't need this anymore
            this.LastNodes.Clear();
        }
Пример #12
0
        /// <summary>
        /// The find resetter nodes.
        /// </summary>
        /// <param name="node">The node.</param>
        /// <param name="names">The names.</param>
        /// <returns>
        /// The resetter nodes.
        /// </returns>
        private bool FindResetterNodes(HtmlNode node, IEnumerable<string> names)
        {
            if (names == null)
            {
                return false;
            }

            return names.Any(t => this.FindResetterNode(node) != null);
        }
Пример #13
0
        /// <summary>
        /// The find resetter node.
        /// </summary>
        /// <param name="node">The node.</param>
        /// <returns>The html node</returns>
        private HtmlNode FindResetterNode(HtmlNode node)
        {
            HtmlNode resetter = this.LastNodes.GetDictionaryValueOrNull(this.currentnode.NodeName);
            if (resetter == null)
            {
                return null;
            }

            if (resetter.Closed)
            {
                return null;
            }

            return resetter.StreamPosition < node.StreamPosition ? null : resetter;
        }
Пример #14
0
        /// <summary>
        /// The update last parent node.
        /// </summary>
        internal void UpdateLastParentNode()
        {
            do
            {
                if (this.lastparentnode.Closed)
                {
                    this.lastparentnode = this.lastparentnode.ParentNode;
                }
            }
            while ((this.lastparentnode != null) && this.lastparentnode.Closed);

            if (this.lastparentnode == null)
            {
                this.lastparentnode = this.documentnode;
            }
        }
Пример #15
0
        /// <summary>
        /// Moves to the node that has an attribute of type ID whose value matches the specified string.
        /// </summary>
        /// <param name="id">
        /// A string representing the ID value of the node to which you want to move. This argument does not need to be atomized. 
        /// </param>
        /// <returns>
        /// true if the move was successful, otherwise false. If false, the position of the navigator is unchanged. 
        /// </returns>
        public override bool MoveToId(string id)
        {
            HtmlNode node = this.htmlDocument.GetElementbyId(id);
            if (node == null)
            {
                return false;
            }

            this.currentnode = node;

            return true;
        }
Пример #16
0
        /// <summary>
        /// The read document encoding.
        /// </summary>
        /// <param name="node">The node.</param>
        private void ReadDocumentEncoding(HtmlNode node)
        {
            if (!this.OptionReadEncoding)
            {
                return;
            }

            // format is
            // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />

            // when we append a child, we are in node end, so attributes are already populated
            if (node.NameLength != 4)
            {
                // quick check, avoids string alloc
                return;
            }

            if (node.NodeName != "meta")
            {
                // all nodes names are lowercase
                return;
            }

            HtmlAttribute att = node.Attributes["http-equiv"];
            if (att == null)
            {
                return;
            }

            if (string.Compare(att.Value, "content-type", StringComparison.OrdinalIgnoreCase) != 0)
            {
                return;
            }

            HtmlAttribute content = node.Attributes["content"];
            if (content != null)
            {
                string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
                if (!string.IsNullOrEmpty(charset))
                {
                    // The following check fixes the the bug described at: http://htmlagilitypack.codeplex.com/WorkItem/View.aspx?WorkItemId=25273
                    if (string.Equals(charset, "utf8", StringComparison.OrdinalIgnoreCase))
                    {
                        charset = "utf-8";
                    }

                    try
                    {
                        this.DeclaredEncoding = Encoding.GetEncoding(charset);
                    }
                    catch (ArgumentException)
                    {
                        this.DeclaredEncoding = null;
                    }

                    if (this.onlyDetectEncoding)
                    {
                        throw new EncodingFoundException(this.DeclaredEncoding);
                    }

                    if (this.StreamEncoding != null)
                    {
                        if (this.DeclaredEncoding != null)
                        {
                            if (this.DeclaredEncoding.WindowsCodePage != this.StreamEncoding.WindowsCodePage)
                            {
                                this.AddError(
                                    HtmlParseErrorCode.CharsetMismatch,
                                    this.line,
                                    this.lineposition,
                                    this.cuurrentindex,
                                    node.OuterHtml,
                                    "Encoding mismatch between StreamEncoding: " + this.StreamEncoding.WebName + " and DeclaredEncoding: " + this.DeclaredEncoding.WebName);
                            }
                        }
                    }
                }
            }
        }
Пример #17
0
        /// <summary>
        /// Moves to the next sibling of the current node.
        /// </summary>
        /// <returns>
        /// true if the navigator is successful moving to the next sibling node, false if there are no more siblings or if the navigator is currently positioned on an attribute node. If false, the position of the navigator is unchanged. 
        /// </returns>
        public override bool MoveToNext()
        {
            if (this.currentnode.NextSibling == null)
            {
                return false;
            }

            this.currentnode = this.currentnode.NextSibling;
            return true;
        }
Пример #18
0
        /// <summary>
        /// Detects the encoding of an HTML text provided on a TextReader.
        /// </summary>
        /// <param name="reader">
        /// The TextReader used to feed the HTML. May not be null. 
        /// </param>
        /// <returns>
        /// The detected encoding. 
        /// </returns>
        public Encoding DetectEncoding(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            this.onlyDetectEncoding = true;
            this.Openednodes = this.OptionCheckSyntax ? new Dictionary<int, HtmlNode>() : null;

            this.Nodesid = this.OptionUseIdAttribute ? new Dictionary<string, HtmlNode>() : null;

            var sr = reader as StreamReader;
            this.StreamEncoding = sr != null ? sr.CurrentEncoding : null;

            this.DeclaredEncoding = null;

            this.Text = reader.ReadToEnd();
            this.documentnode = this.CreateNode(HtmlNodeType.Document, 0);

            // this is almost a hack, but it allows us not to muck with the original parsing code
            try
            {
                this.Parse();
            }
            catch (EncodingFoundException ex)
            {
                return ex.Encoding;
            }

            return null;
        }
Пример #19
0
        /// <summary>
        /// Moves to the previous sibling of the current node.
        /// </summary>
        /// <returns>
        /// true if the navigator is successful moving to the previous sibling node, false if there is no previous sibling or if the navigator is currently positioned on an attribute node. 
        /// </returns>
        public override bool MoveToPrevious()
        {
            if (this.currentnode.PreviousSibling == null)
            {
                return false;
            }

            this.currentnode = this.currentnode.PreviousSibling;
            return true;
        }
Пример #20
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HtmlNodeNavigator"/> class.
        /// </summary>
        /// <param name="doc">The doc.</param>
        /// <param name="currentNode">The current node.</param>
        internal HtmlNodeNavigator(HtmlDocument doc, HtmlNode currentNode)
        {
            if (currentNode == null)
            {
                throw new ArgumentNullException("currentNode");
            }

            if (currentNode.OwnerDocument != doc)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }

            this.htmlDocument = doc;
            this.Reset();
            this.currentnode = currentNode;
        }
Пример #21
0
 /// <summary>
 /// The reset.
 /// </summary>
 private void Reset()
 {
     this.currentnode = this.htmlDocument.DocumentNode;
     this.attindex = -1;
 }
Пример #22
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HtmlNodeNavigator"/> class.
        /// </summary>
        /// <param name="nav">
        /// The nav.
        /// </param>
        private HtmlNodeNavigator(HtmlNodeNavigator nav)
        {
            if (nav == null)
            {
                throw new ArgumentNullException("nav");
            }

            this.htmlDocument = nav.htmlDocument;
            this.currentnode = nav.currentnode;
            this.attindex = nav.attindex;
            this.htmlNameTable = nav.htmlNameTable; // REVIEW: should we do this?
        }
Пример #23
0
        /// <summary>
        /// Removes the specified child node.
        /// </summary>
        /// <param name="oldChild">
        /// The node being removed. May not be <c>null</c> . 
        /// </param>
        /// <param name="keepGrandChildren">
        /// true to keep grand children of the node, false otherwise. 
        /// </param>
        /// <returns>
        /// The node removed. 
        /// </returns>
        public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
        {
            if (oldChild == null)
            {
                throw new ArgumentNullException("oldChild");
            }

            if ((oldChild.childnodes != null) && keepGrandChildren)
            {
                // get prev sibling
                HtmlNode prev = oldChild.PreviousSibling;

                // reroute grand children to ourselves
                foreach (HtmlNode grandchild in oldChild.childnodes)
                {
                    this.InsertAfter(grandchild, prev);
                }
            }

            this.RemoveChild(oldChild);
            this.OuterChanged = true;
            this.InnerChanged = true;
            return oldChild;
        }
Пример #24
0
        /// <summary>
        /// Moves to the same position as the specified HtmlNavigator.
        /// </summary>
        /// <param name="other">
        /// The HtmlNavigator positioned on the node that you want to move to. 
        /// </param>
        /// <returns>
        /// true if successful, otherwise false. If false, the position of the navigator is unchanged. 
        /// </returns>
        public override bool MoveTo(XPathNavigator other)
        {
            var nav = other as HtmlNodeNavigator;
            if (nav == null)
            {
                return false;
            }

            if (nav.htmlDocument == this.htmlDocument)
            {
                this.currentnode = nav.currentnode;
                this.attindex = nav.attindex;

                return true;
            }

            // we don't know how to handle that
            return false;
        }
Пример #25
0
        /// <summary>
        /// The write attributes.
        /// </summary>
        /// <param name="writer">
        /// The writer.
        /// </param>
        /// <param name="node">
        /// The node.
        /// </param>
        internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
        {
            if (!node.HasAttributes)
            {
                return;
            }

            // we use Hashitems to make sure attributes are written only once
            foreach (HtmlAttribute att in node.Attributes.Hashitems.Values)
            {
                writer.WriteAttributeString(att.XmlName, att.Value);
            }
        }
Пример #26
0
        /// <summary>
        /// Moves to the first sibling of the current node.
        /// </summary>
        /// <returns>
        /// true if the navigator is successful moving to the first sibling node, false if there is no first sibling or if the navigator is currently positioned on an attribute node. 
        /// </returns>
        public override bool MoveToFirst()
        {
            if (this.currentnode.ParentNode == null)
            {
                return false;
            }

            if (this.currentnode.ParentNode.FirstChild == null)
            {
                return false;
            }

            this.currentnode = this.currentnode.ParentNode.FirstChild;

            return true;
        }
Пример #27
0
        /// <summary>
        /// Adds the specified node to the end of the list of children of this node.
        /// </summary>
        /// <param name="newChild">
        /// The node to add. May not be null. 
        /// </param>
        /// <returns>
        /// The node added. 
        /// </returns>
        public HtmlNode AppendChild(HtmlNode newChild)
        {
            if (newChild == null)
            {
                throw new ArgumentNullException("newChild");
            }

            this.ChildNodes.Append(newChild);
            this.OwnerDocument.SetIdForNode(newChild, newChild.GetId());
            this.OuterChanged = true;
            this.InnerChanged = true;
            return newChild;
        }
Пример #28
0
        /// <summary>
        /// Moves to the first child of the current node.
        /// </summary>
        /// <returns>
        /// true if there is a first child node, otherwise false. 
        /// </returns>
        public override bool MoveToFirstChild()
        {
            if (!this.currentnode.HasChildNodes)
            {
                return false;
            }

            this.currentnode = this.currentnode.ChildNodes[0];

            return true;
        }
Пример #29
0
        /// <summary>
        /// Clone and entitize an HtmlNode. This will affect attribute values and nodes' text. It will also entitize all child nodes.
        /// </summary>
        /// <param name="node">
        /// The node to entitize. 
        /// </param>
        /// <returns>
        /// An entitized cloned node. 
        /// </returns>
        public static HtmlNode Entitize(HtmlNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            HtmlNode result = node.CloneNode(true);
            if (result.HasAttributes)
            {
                Entitize(result.Attributes);
            }

            if (result.HasChildNodes)
            {
                Entitize(result.ChildNodes);
            }
            else
            {
                if (result.NodeType == HtmlNodeType.Text)
                {
                    ((HtmlTextNode)result).Text = Entitize(((HtmlTextNode)result).Text, true, true);
                }
            }

            return result;
        }
Пример #30
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HtmlDocument"/> class.
        /// Creates an instance of an HTML document.
        /// </summary>
        public HtmlDocument()
        {
            this.documentnode = this.CreateNode(HtmlNodeType.Document, 0);

            this.OptionDefaultStreamEncoding = Encoding.Default;
            this.OptionExtractErrorSourceTextMaxLength = 100;
            this.OptionCheckSyntax = true;
            this.OptionReadEncoding = true;
            this.OptionUseIdAttribute = true;

            this.LastNodes = new Dictionary<string, HtmlNode>();
        }