Пример #1
0
            [SuppressMessage("Microsoft.Maintainability", "CA1502")] // Most of the complexity is due to simple switch statements.  Not sure if it would truly be less complex if it was split into different methods...
            private void Parse()
            {
                if (m_ParseState == HtmlParseState.BetweenAttributes)
                {
                    m_AttributeLineNumber = m_LineNumber;
                    m_AttributeLinePosition = m_LinePosition;
                }

                // keep track of the previous character to find empty element nodes.
                char? previousChar = null;
                bool setPreviousChar = false; // only set it after the first loop
                char currentChar = '\0'; // only setting this here to shut up the compiler.
                while (true)
                {
                    // All node types end at the '>' character with the exception of the Text node.
                    // So, for the Text parse state, peek at the next character rather than read it.
                    // If it is the '<' character, end the node.
                    if (m_ParseState == HtmlParseState.Text && (char)((ushort)m_Reader.Peek()) == '<')
                    {
                        EndNode(previousChar);
                        break; // break out of the while loop - done parsing the Text node
                    }
                    if (setPreviousChar) previousChar = currentChar;
                    currentChar = NextChar();
                    setPreviousChar = true;
                    switch (m_ParseState)
                    {
                        case HtmlParseState.AttributeAfterEquals:
                            if (currentChar == '>')
                            {
                                AddAttribute();
                                EndNode(previousChar);
                                break;
                            }
                            else if (!Char.IsWhiteSpace(currentChar))
                            {
                                if (currentChar == '\'' || currentChar == '"')
                                {
                                    m_QuoteChar = currentChar;
                                    m_ParseState = HtmlParseState.QuotedAttributeValue;
                                    // continue parsing
                                }
                                else // non-quoted attribute value
                                {
                                    AppendToAttributeValue(currentChar);
                                    m_ParseState = HtmlParseState.AttributeValue;
                                    // continue parsing
                                }
                            }
                            continue;
                        case HtmlParseState.AttributeBeforeEquals:
                            if (!Char.IsWhiteSpace(currentChar))
                            {
                                if (currentChar == '>')
                                {
                                    AddAttribute();
                                    EndNode(previousChar);
                                    break;
                                }
                                else if (currentChar == '=')
                                {
                                    m_ParseState = HtmlParseState.AttributeAfterEquals;
                                    // continue parsing
                                }
                                else // start of a new attribute name
                                {
                                    AddAttribute();
                                    StartNewAttribute();
                                    AppendToAttributeName(currentChar);
                                    m_ParseState = HtmlParseState.AttributeName;
                                    // continue parsing
                                }
                            }
                            continue;
                        case HtmlParseState.AttributeName:
                            if (Char.IsWhiteSpace(currentChar))
                            {
                                m_ParseState = HtmlParseState.AttributeBeforeEquals;
                                // continue parsing
                            }
                            else if (currentChar == '=')
                            {
                                m_ParseState = HtmlParseState.AttributeAfterEquals;
                                // continue parsing
                            }
                            else if (currentChar == '>')
                            {
                                AddAttribute();
                                EndNode(previousChar);
                                break;
                            }
                            else
                            {
                                AppendToAttributeName(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.AttributeValue:
                            if (Char.IsWhiteSpace(currentChar))
                            {
                                AddAttribute();
                                m_ParseState = HtmlParseState.BetweenAttributes;
                                // continue parsing
                            }
                            else if (currentChar == '>')
                            {
                                AddAttribute();
                                EndNode(previousChar);
                                break;
                            }
                            else
                            {
                                AppendToAttributeValue(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.BetweenAttributes:
                            if (!Char.IsWhiteSpace(currentChar))
                            {
                                if (currentChar == '>')
                                {
                                    EndNode(previousChar);
                                    break;
                                }
                                else
                                {
                                    StartNewAttribute();
                                    AppendToAttributeName(currentChar);
                                    m_ParseState = HtmlParseState.AttributeName;
                                    // continue parsing
                                }
                            }
                            continue;
                        case HtmlParseState.Comment:
                            if (currentChar == '-')
                            {
                                char secondChar = NextChar();
                                if (secondChar == '-')
                                {
                                    char thirdChar = NextChar();
                                    int count = 0; // keep track of the number of '-' signs to append to the node value
                                    while (thirdChar == '-')
                                    {
                                        thirdChar = NextChar();
                                        count++;
                                    }
                                    // check if the end of the comment has been reached
                                    if (thirdChar == '>')
                                    {
                                        while (count-- > 0)
                                        {
                                            AppendToValue('-');
                                        }
                                        EndNode(previousChar);
                                        break;
                                    }
                                    else
                                    {
                                        // If there were extra dashes before the final -->, append them to the
                                        // value.
                                        count += 2;
                                        while (count-- > 0)
                                        {
                                            AppendToValue('-');
                                        }
                                        AppendToValue(thirdChar);
                                        // continue parsing
                                    }
                                }
                                else
                                {
                                    AppendToValue(currentChar);
                                    AppendToValue(secondChar);
                                    // continue parsing
                                }
                            }
                            else
                            {
                                AppendToValue(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.Identifier:
                            if (currentChar == '>')
                            {
                                EndNode(previousChar);
                                break;
                            }
                            else if (Char.IsWhiteSpace(currentChar))
                            {
                                m_ParseState = HtmlParseState.BetweenAttributes;
                                break;
                            }
                            else
                            {
                                m_Name.Append(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.QuotedAttributeValue:
                            if (currentChar == m_QuoteChar && currentChar != '\0')
                            {
                                AddAttribute();
                                m_ParseState = HtmlParseState.BetweenAttributes;
                                // continue parsing
                            }
                            else
                            {
                                AppendToAttributeValue(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.Tag:
                        case HtmlParseState.EndTag:
                            if (currentChar == '>')
                            {
                                EndNode(previousChar);
                                break;
                            }
                            else if (Char.IsWhiteSpace(currentChar))
                            {
                                m_ParseState = HtmlParseState.BetweenAttributes;
                                break;
                            }
                            else
                            {
                                m_Name.Append(currentChar);
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.Text:
                            // This node type ending was checked above as a special case.
                            AppendToValue(currentChar);
                            // continue parsing
                            continue;
                        case HtmlParseState.None:
                            if (currentChar != '<')
                            {
                                m_ParseState = HtmlParseState.Text;
                                StartNode(HtmlNodeType.Text);
                                AppendToValue(currentChar);
                                break;
                            }
                            else
                            {
                                m_ParseState = HtmlParseState.BeginTag;
                                // continue parsing
                            }
                            continue;
                        case HtmlParseState.BeginTag:
                            if (currentChar == '!')
                            {
                                char secondChar = NextChar();
                                if (secondChar == '-')
                                {
                                    char thirdChar = NextChar();
                                    if (thirdChar == '-')
                                    {
                                        m_ParseState = HtmlParseState.Comment;
                                        StartNode(HtmlNodeType.Comment);
                                        break;
                                    }
                                    else if (thirdChar == '>')
                                    {
                                        StartNode(HtmlNodeType.Identifier);
                                        AppendToValue(secondChar);
                                        EndNode(previousChar);
                                        break;
                                    }
                                }
                                else if (secondChar == '>')
                                {
                                    StartNode(HtmlNodeType.Identifier);
                                    EndNode(previousChar);
                                    break;
                                }
                                else
                                {
                                    StartNode(HtmlNodeType.Identifier);
                                    m_Name.Append(secondChar);
                                    m_ParseState = HtmlParseState.Identifier;
                                    // continue parsing
                                }
                            }
                            else if (Char.IsLetter(currentChar))
                            {
                                StartNode(HtmlNodeType.Element);
                                m_ParseState = HtmlParseState.Tag;
                                m_Name.Append(currentChar);
                                // continue parsing
                            }
                            else if (currentChar == '/')
                            {
                                StartNode(HtmlNodeType.EndElement);
                                m_ParseState = HtmlParseState.EndTag;
                                // continue parsing
                            }
                            else
                            {
                                // if a number or symbol appears after a '<', IE treats it
                                // as text instead of an element.
                                m_ParseState = HtmlParseState.Text;
                                StartNode(HtmlNodeType.Text);
                                AppendToValue('<');
                                AppendToValue(currentChar);
                                break;
                            }
                            continue;
                    }
                    break;
                }
            }
Пример #2
0
 /// <summary>
 /// Finishes parsing the current node.
 /// </summary>
 private void EndNode(char? previousChar)
 {
     // if previousChar is "/" and this is an element node, set IsEmptyElement.
     if (previousChar == '/' && NodeType == HtmlNodeType.Element)
     {
         IsEmptyElement = true;
     }
     else
     {
         IsEmptyElement = false;
     }
     m_ParseState = HtmlParseState.None;
 }