Exemplo n.º 1
0
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection nodes = new HtmlNodeCollection((HtmlElement)null);

            html = this.PreprocessScript(html, "script");
            html = this.PreprocessScript(html, "style");
            html = this.RemoveComments(html);
            html = this.RemoveSGMLComments(html);
            StringCollection tokens = this.GetTokens(html);
            int         index1      = 0;
            HtmlElement htmlElement = (HtmlElement)null;

            while (index1 < tokens.Count)
            {
                if ("<".Equals(tokens[index1]))
                {
                    int index2 = index1 + 1;
                    if (index2 >= tokens.Count)
                    {
                        return(nodes);
                    }
                    string name1 = tokens[index2];
                    index1      = index2 + 1;
                    htmlElement = new HtmlElement(name1);
                    while (index1 < tokens.Count && !">".Equals(tokens[index1]) && !"/>".Equals(tokens[index1]))
                    {
                        string name2 = tokens[index1];
                        ++index1;
                        if (index1 < tokens.Count && "=".Equals(tokens[index1]))
                        {
                            int    index3 = index1 + 1;
                            string str    = index3 >= tokens.Count ? (string)null : tokens[index3];
                            index1 = index3 + 1;
                            HtmlAttribute attribute = new HtmlAttribute(name2, HtmlEncoder.DecodeValue(str));
                            htmlElement.Attributes.Add(attribute);
                        }
                        else if (index1 < tokens.Count)
                        {
                            HtmlAttribute attribute = new HtmlAttribute(name2, (string)null);
                            htmlElement.Attributes.Add(attribute);
                        }
                    }
                    nodes.Add((HtmlNode)htmlElement);
                    if (index1 < tokens.Count && "/>".Equals(tokens[index1]))
                    {
                        htmlElement.IsTerminated = true;
                        ++index1;
                        htmlElement = (HtmlElement)null;
                    }
                    else if (index1 < tokens.Count && ">".Equals(tokens[index1]))
                    {
                        ++index1;
                    }
                }
                else if (">".Equals(tokens[index1]))
                {
                    ++index1;
                }
                else if ("</".Equals(tokens[index1]))
                {
                    int index2 = index1 + 1;
                    if (index2 >= tokens.Count)
                    {
                        return(nodes);
                    }
                    string name = tokens[index2];
                    index1 = index2 + 1;
                    int tagOpenNodeIndex = this.FindTagOpenNodeIndex(nodes, name);
                    if (tagOpenNodeIndex != -1)
                    {
                        this.MoveNodesDown(ref nodes, tagOpenNodeIndex + 1, (HtmlElement)nodes[tagOpenNodeIndex]);
                    }
                    while (index1 < tokens.Count && !">".Equals(tokens[index1]))
                    {
                        ++index1;
                    }
                    if (index1 < tokens.Count && ">".Equals(tokens[index1]))
                    {
                        ++index1;
                    }
                    htmlElement = (HtmlElement)null;
                }
                else
                {
                    string str = tokens[index1];
                    if (this.mRemoveEmptyElementText)
                    {
                        str = this.RemoveWhitespace(str);
                    }
                    string text = HtmlParser.DecodeScript(str);
                    if (!this.mRemoveEmptyElementText || text.Length != 0)
                    {
                        if (htmlElement == null || !htmlElement.NoEscaping)
                        {
                            text = HtmlEncoder.DecodeValue(text);
                        }
                        HtmlText htmlText = new HtmlText(text);
                        nodes.Add((HtmlNode)htmlText);
                    }
                    ++index1;
                }
            }
            return(nodes);
        }
Exemplo n.º 2
0
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection htmlNodeCollection = new HtmlNodeCollection(null);

            html = this.PreprocessScript(html, "script");
            html = this.PreprocessScript(html, "style");
            html = this.RemoveComments(html);
            html = this.RemoveSGMLComments(html);
            StringCollection tokens        = this.GetTokens(html);
            int                i           = 0;
            HtmlElement        htmlElement = null;
            HtmlNodeCollection result;

            while (i < tokens.Count)
            {
                if ("<".Equals(tokens[i]))
                {
                    i++;
                    if (i >= tokens.Count)
                    {
                        result = htmlNodeCollection;
                        return(result);
                    }
                    string name = tokens[i];
                    i++;
                    htmlElement = new HtmlElement(name);
                    while (i < tokens.Count && !">".Equals(tokens[i]) && !"/>".Equals(tokens[i]))
                    {
                        string name2 = tokens[i];
                        i++;
                        if (i < tokens.Count && "=".Equals(tokens[i]))
                        {
                            i++;
                            string value;
                            if (i < tokens.Count)
                            {
                                value = tokens[i];
                            }
                            else
                            {
                                value = null;
                            }
                            i++;
                            HtmlAttribute attribute = new HtmlAttribute(name2, HtmlEncoder.DecodeValue(value));
                            htmlElement.Attributes.Add(attribute);
                        }
                        else if (i < tokens.Count)
                        {
                            HtmlAttribute attribute = new HtmlAttribute(name2, null);
                            htmlElement.Attributes.Add(attribute);
                        }
                    }
                    htmlNodeCollection.Add(htmlElement);
                    if (i < tokens.Count && "/>".Equals(tokens[i]))
                    {
                        htmlElement.IsTerminated = true;
                        i++;
                        htmlElement = null;
                    }
                    else if (i < tokens.Count && ">".Equals(tokens[i]))
                    {
                        i++;
                    }
                }
                else if (">".Equals(tokens[i]))
                {
                    i++;
                }
                else if ("</".Equals(tokens[i]))
                {
                    i++;
                    if (i >= tokens.Count)
                    {
                        result = htmlNodeCollection;
                        return(result);
                    }
                    string name = tokens[i];
                    i++;
                    int num = this.FindTagOpenNodeIndex(htmlNodeCollection, name);
                    if (num != -1)
                    {
                        this.MoveNodesDown(ref htmlNodeCollection, num + 1, (HtmlElement)htmlNodeCollection[num]);
                    }
                    while (i < tokens.Count && !">".Equals(tokens[i]))
                    {
                        i++;
                    }
                    if (i < tokens.Count && ">".Equals(tokens[i]))
                    {
                        i++;
                    }
                    htmlElement = null;
                }
                else
                {
                    string text = tokens[i];
                    if (this.mRemoveEmptyElementText)
                    {
                        text = this.RemoveWhitespace(text);
                    }
                    text = HtmlParser.DecodeScript(text);
                    if (!this.mRemoveEmptyElementText || text.Length != 0)
                    {
                        if (htmlElement == null || !htmlElement.NoEscaping)
                        {
                            text = HtmlEncoder.DecodeValue(text);
                        }
                        HtmlText node = new HtmlText(text);
                        htmlNodeCollection.Add(node);
                    }
                    i++;
                }
            }
            result = htmlNodeCollection;
            return(result);
        }