Example #1
0
        /// <summary>
        /// Use this to prepare the iterator object to continue finding siblings. It retains the parent.
        /// It just avoids having to recreate an instance of this object for the next tag.
        /// </summary>

        public void Reset()
        {
            TokenizerState = TokenizerState.Default;
            HtmlStart = Pos;
            InsertionMode = InsertionMode.Default;
            Element = null;
        }
Example #2
0
        /// <summary>
        /// Returns a literal object for the text between HtmlStart (the last position of the end of a
        /// tag) and the current position. If !AllowLiterals then it's wrapped in a span.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against
        /// </param>
        /// <param name="literal">
        /// [out] The literal.
        /// </param>
        ///
        /// <returns>
        /// true if it succeeds, false if it fails.
        /// </returns>

        public bool TryGetLiteral(HtmlElementFactory factory, out IDomObject literal)
        {
            if (Pos <= HtmlStart)
            {
                literal = null;
                return(false);
            }

            // There's plain text -return it as a literal.

            DomText lit;

            switch (InsertionMode)
            {
            case InsertionMode.Invalid:
                lit = new DomInvalidElement();
                break;

            case InsertionMode.Text:
                InsertionMode = InsertionMode.Default;
                lit           = new DomInnerText();
                break;

            default:
                lit = new DomText();
                break;
            }
            literal = lit;

            //if (factory.IsBound)
            //{
            //    lit.SetTextIndex(factory.Document, factory.Document.DocumentIndex.TokenizeString(HtmlStart, Pos - HtmlStart));
            //}
            //else
            //{
            string text = factory.Html.SubstringBetween(HtmlStart, Pos);

            literal.NodeValue = HtmlData.HtmlDecode(text);
            //}

            if (WrapLiterals)
            {
                DomElement wrapper = DomElement.Create("span");
                wrapper.AppendChildUnsafe(literal);
                literal = wrapper;
            }


            if (Parent != null)
            {
                ((DomElement)Parent.Element).AppendChildUnsafe(literal);
                Reset();
                return(false);
            }
            else
            {
                TokenizerState = TokenizerState.Finished;
                return(true);
            }
        }
Example #3
0
            public TokenizerState ProcessChar(char c, string fullExpression, int currentIndex)
            {
                TokenizerState state = _tokenProcessors[_current].ProcessChar(c, fullExpression, currentIndex);

                if (state == TokenizerState.Success)
                {
                    _current++;

                    if (_current == _tokenProcessors.Length)
                    {
                        return(TokenizerState.Success);
                    }

                    _startIndexes[_current] = currentIndex - _firstIndex;
                    _tokenProcessors[_current].ResetState();

                    state = _tokenProcessors[_current].ProcessChar(c, fullExpression, currentIndex);
                }

                if (state == TokenizerState.Fail)
                {
                    return(TokenizerState.Fail);
                }

                if (_current == 0 && _firstIndex < 0)
                {
                    _firstIndex = currentIndex;
                }

                return(TokenizerState.Valid);
            }
Example #4
0
        /// <summary>
        /// Use this to prepare the iterator object to continue finding siblings. It retains the parent.
        /// It just avoids having to recreate an instance of this object for the next tag.
        /// </summary>

        public void Reset()
        {
            TokenizerState = TokenizerState.Default;
            HtmlStart      = Pos;
            InsertionMode  = InsertionMode.Default;
            Element        = null;
        }
Example #5
0
        private bool TryFillTokenIfValidAtInputEnd(Token token, TokenizerState state)
        {
            switch (state)
            {
            case TokenizerState.Begin:
                return(false);

            case TokenizerState.Indentation:
                return(true);

            case TokenizerState.Identifier:
                return(true);

            case TokenizerState.Number:
                return(true);

            case TokenizerState.String:
                return(false);

            case TokenizerState.NewLineCR:
                return(true);

            default:
                throw new ArgumentException($"Unexpected state for Tokenizer: '{state}'");
            }
        }
Example #6
0
 public static PartialExceptionWithContext<TokenException> IllegalStateAt(char c, int index, TokenizerState state)
 {
     string msg =
         String.Format("Tokenizer got invalid state at position {1}({0}). Tokenizer was in state {2}", c, index,
                       state);
     return MakePartial(new TokenException(msg));
 }
Example #7
0
 private void EndToken(TokenType tokenType)
 {
     _currentToken.TokenType = tokenType;
     _tokens.Add(_currentToken);
     _currentTokenizerState = TokenizerState.Default;
     _currentToken          = new Token(TokenType.Unknown, string.Empty);
 }
Example #8
0
        public XamlTokenizer(TextReader reader)
        {
            this.reader = reader;

            lineNumber = 1;
            charPosition = 1;
            state = TokenizerState.NotStarted;
        }
Example #9
0
 void EmitOrAppendStrBuf(TokenizerState returnState)
 {
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendStrBufToLongStrBuf();
     }
     else
     {
         EmitStrBuf();
     }
 }
        internal static Action<Stack<TokenizerState>> ReplaceState(TokenizerState state, short pushCount)
        {
            if (pushCount <= 0)
                throw new ArgumentOutOfRangeException("pushCount", pushCount, "Must be a positive number.");

            return states =>
            {
                states.Pop();
                for (var i = 0; i < pushCount; i ++)
                    states.Push(state);
            };
        }
Example #11
0
 private void DetermineState()
 {
     if (Literal())
     {
         _state = TokenizerState.Literal;
     }
     else if (Seperator())
     {
         _state = TokenizerState.Seperator;
     }
     else
     {
         _state = TokenizerState.Normal;
     }
 }
Example #12
0
        public TokenizerState Feed(char c, string fullExpression, int currentIndex)
        {
            if (!_stillValid)
            {
                return(TokenizerState.Fail);
            }

            TokenizerState state = _tokenProcessor.ProcessChar(c, fullExpression, currentIndex);

            if (state != TokenizerState.Valid)
            {
                _stillValid = false;
            }

            return(state);
        }
Example #13
0
        /// <summary>
        /// Close out this element. This method will return true if something can be yielded; this this
        /// means it's got a parent at the top of the heirarchy. Otherwise it's just closed but false is
        /// returned.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against.
        /// </param>
        ///
        /// <returns>
        /// An enumerator that allows foreach to be used to process close element in this collection.
        /// </returns>

        public IEnumerable <IDomObject> CloseElement(HtmlElementFactory factory)
        {
            IDomObject element = null;

            if (TryGetLiteral(factory, out element))
            {
                yield return(element);
            }

            if (Parent != null)
            {
                if (Parent.Parent == null)
                {
                    yield return(Parent.Element);
                }
                Parent.Reset(Pos);
                TokenizerState = TokenizerState.Finished;
            }
        }
        /// <summary>
        /// Creates the invalid tokenizer state exception.
        /// </summary>
        /// <param name="tokenizerState">State of the tokenizer.</param>
        /// <param name="batchSource">The batch source.</param>
        /// <returns>A derived <see cref="ParserException"/> based on the state</returns>
        internal static ParserException CreateInvalidTokenizerStateException(
            TokenizerState tokenizerState,
            IBatchSource batchSource)
        {
            switch (tokenizerState)
            {
            case TokenizerState.SingleQuoteString:
            case TokenizerState.DoubleQuoteString:

                return(new UnclosedStringLiteralException(tokenizerState, batchSource));

            case TokenizerState.BlockComment:

                return(new UnclosedBlockCommentException(tokenizerState, batchSource));

            default:

                return(new ParserException($"Unexpected state {tokenizerState}", batchSource));
            }
        }
        /// <summary>
        /// Tries the parse connection string.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="connectionString">The connection string.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseConnectionString <T>(string connectionString, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (connectionString == null)
            {
                throw new ArgumentNullException("connectionString");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                IList <string> groups = GetWords(connectionString, new string[] { "\"\"", "\'\'" }, '\0', EscapeMode.DoubleItem, ";".ToCharArray());

                foreach (string group in groups)
                {
                    IList <string> parts = GetWords(group, new string[] { "\"\"", "\'\'" }, '\0', EscapeMode.DoubleItem, "=".ToCharArray());

                    TokenItem token;
                    if ((parts.Count == 2) && state.Definition.TryGetToken(parts[0], args.CaseSensitive, out token))
                    {
                        token.Evaluate(parts[1], state);
                    }
                    else if (args.SkipUnknownNamedItems)
                    {
                        continue;
                    }
                    else
                    {
                        return(false);
                    }
                }
                // TODO: Parse connectionstring using definition

                to = state.Instance;
                return(true);
            }
        }
        /// <summary>
        /// Formats the tokenizer state error.
        /// </summary>
        /// <param name="state">The state.</param>
        /// <returns>Formatted string.</returns>
        private static string FormatTokenizerStateError(TokenizerState state)
        {
            switch (state)
            {
            case TokenizerState.BlockComment:

                return("Unclosed block comment at end of file");

            case TokenizerState.DoubleQuoteString:

                return("Unclosed double-quote string at end of file");

            case TokenizerState.SingleQuoteString:

                return("Unclosed single-quote string at end of file");

            default:

                return($"Unexpected state at end of file (should not be an error): {state}");
            }
        }
Example #17
0
            public TokenizerState ProcessChar(char c, string fullExpression, int currentIndex)
            {
                TokenizerState returnState = TokenizerState.Fail;

                foreach (ITokenProcessor matcher in _tokenProcessors)
                {
                    TokenizerState state = matcher.ProcessChar(c, fullExpression, currentIndex);

                    if (state == TokenizerState.Success)
                    {
                        returnState = state;
                    }

                    if (state == TokenizerState.Valid && returnState == TokenizerState.Fail)
                    {
                        returnState = state;
                    }
                }

                return(returnState);
            }
        /// <summary>
        /// Tries to parse the name value collection.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="collection">The collection.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseNameValueCollection <T>(NameValueCollection collection, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (collection == null)
            {
                throw new ArgumentNullException("collection");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                for (int i = 0; i < collection.Count; i++)
                {
                    TokenItem ti;

                    if (!state.Definition.TryGetToken(collection.Keys[i], args.CaseSensitive, out ti))
                    {
                        if (args.SkipUnknownNamedItems)
                        {
                            continue;
                        }
                        else
                        {
                            return(false);
                        }
                    }

                    ti.Evaluate(collection[i], state);
                }

                to = state.Instance;
                return(true);
            }
        }
        /// <summary>
        /// Tries to parse the name value collection.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="collection">The collection.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseNameValueCollection <T>(IDictionary <string, string> collection, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (collection == null)
            {
                throw new ArgumentNullException("collection");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                foreach (KeyValuePair <string, string> kvp in collection)
                {
                    TokenItem ti;

                    if (!state.Definition.TryGetToken(kvp.Key, args.CaseSensitive, out ti))
                    {
                        if (args.SkipUnknownNamedItems)
                        {
                            continue;
                        }
                        else
                        {
                            return(false);
                        }
                    }

                    ti.Evaluate(kvp.Value, state);
                }

                to = state.Instance;
                return(true);
            }
        }
Example #20
0
            public IEnumerable <Token> GetTokens()
            {
                if (position >= value.Length)
                {
                    yield break;
                }
                int    readCount     = 0;
                bool   readCompleted = false;
                string errorMessage  = null;

                while (!readCompleted)
                {
                    switch (currentState)
                    {
                    case TokenizerState.ReadyToReadKey:
                    {
                        if (position >= value.Length)
                        {
                            errorMessage = "Unexpected string end in '{0}' state.".FormatInvariant(currentState);
                            currentState = TokenizerState.Error;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                        case '&':
                            errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, currentState);
                            currentState = TokenizerState.Error;
                            break;

                        case '/':
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            currentState = TokenizerState.ReadKey;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.ReadKey:
                    {
                        if (position >= value.Length)
                        {
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                            yield return(CreateToken(TokenType.Key, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.ReadValue;
                            break;

                        case '&':
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.ReadyToReadKey;
                            break;

                        case '/':
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.ReadValue:
                    {
                        if (position >= value.Length)
                        {
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                            errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, currentState);
                            currentState = TokenizerState.Error;
                            break;

                        case '&':
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.ReadyToReadKey;
                            break;

                        case '/':
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.Finish:
                    case TokenizerState.Error:
                        readCompleted = true;
                        break;

                    default:
                        throw new NotSupportedException();
                    }
                    position++;
                }

                if (currentState == TokenizerState.Error)
                {
                    throw new FormatException(errorMessage);
                }
            }
Example #21
0
        /// <summary>
        /// 
        /// </summary>
        /// <returns></returns>
        public XamlToken ReadNextToken()
        {
            var on = true;

            while (on)
            {
                switch (state)
                {
                    case TokenizerState.EndOfStream:
                    {
                        return new XamlToken(XamlTokenType.EndOfStream, lineNumber, charPosition);
                    }

                    case TokenizerState.NotStarted:
                    {
                        var current = ReadNextChar();

                        if (-1 == current)
                        {
                            state = TokenizerState.EndOfStream;
                            continue;
                        }

                        if (Char.IsWhiteSpace((char) current))
                        {
                            state = TokenizerState.HeadingWhitespaces;
                            continue;
                        }


                        switch (current)
                        {
                            case '=':
                            case '<':
                            case '>':
                            {
                                state = TokenizerState.Terminal;
                                return new XamlToken(XamlTokenType.Terminal, ((char) current).ToString(), lineNumber, charPosition);
                            }
                        }

                        break;
                    }
                }
            }

            return new XamlToken(
                XamlTokenType.Terminal,
                '<'.ToString(),
                lineNumber,
                charPosition);
        }
Example #22
0
		public void LoadState(Tokenizer other)
		{
			strBufLen = other.strBufLen;
			if (strBufLen > strBuf.Length)
			{
				strBuf = new char[strBufLen];
			}
			//Array.Copy(other.strBuf, strBuf, strBufLen);
		    Buffer.BlockCopy(other.strBuf, 0, strBuf, 0, strBufLen << 1);
            
			longStrBufLen = other.longStrBufLen;
			if (longStrBufLen > longStrBuf.Length)
			{
				longStrBuf = new char[longStrBufLen];
			}
			//Array.Copy(other.longStrBuf, longStrBuf, longStrBufLen);
            Buffer.BlockCopy(other.longStrBuf, 0, longStrBuf, 0,longStrBufLen<< 1);

			stateSave = other.stateSave;
			returnStateSave = other.returnStateSave;
			endTagExpectation = other.endTagExpectation;
			endTagExpectationAsArray = other.endTagExpectationAsArray;
			// line = 1; XXX line numbers
			lastCR = other.lastCR;
			index = other.index;
			forceQuirks = other.forceQuirks;
			additional = other.additional;
			entCol = other.entCol;
			firstCharKey = other.firstCharKey;
			lo = other.lo;
			hi = other.hi;
			candidate = other.candidate;
			strBufMark = other.strBufMark;
			prevValue = other.prevValue;
			value = other.value;
			seenDigits = other.seenDigits;
			endTag = other.endTag;
			shouldSuspend = false;

			if (other.doctypeName == null)
			{
				doctypeName = null;
			}
			else
			{
				doctypeName = other.doctypeName;
			}

			if (other.systemIdentifier == null)
			{
				systemIdentifier = null;
			}
			else
			{
				systemIdentifier = other.systemIdentifier;
			}

			if (other.publicIdentifier == null)
			{
				publicIdentifier = null;
			}
			else
			{
				publicIdentifier = other.publicIdentifier;
			}

			if (other.tagName == null)
			{
				tagName = null;
			}
			else
			{
				tagName = other.tagName.CloneElementName();
			}

			if (other.attributeName == null)
			{
				attributeName = null;
			}
			else
			{
				attributeName = other.attributeName.CloneAttributeName();
			}
            
			if (other.attributes == null)
			{
				attributes = null;
			}
			else
			{
				attributes = other.attributes.CloneAttributes();
			}
		}
Example #23
0
        public IEnumerator <BaseToken> GetBaseTokens(IEnumerator <char> charEnumer)
        {
            TokenizerState state = TokenizerState.ReadNothing;

            string currentToken = "";

            while (charEnumer.MoveNext())
            {
                char c = charEnumer.Current;

                switch (state)
                {
                case TokenizerState.ReadNothing:
                {
                    if (c == '-')
                    {
                        yield return(new BaseToken(BaseTokenType.minus, "-"));
                    }
                    else if (c == '"')
                    {
                        state = TokenizerState.ReadQuote;
                    }
                    else if (c == '(')
                    {
                        state = TokenizerState.ReadRound;
                    }
                    else if (c == '[')
                    {
                        state = TokenizerState.ReadSquare;
                    }
                    else if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
                    {
                        break;
                    }
                    else
                    {
                    }
                    break;
                }

                case TokenizerState.ReadQuote:
                {
                    if (c == '\\')
                    {
                        state = TokenizerState.ReadSlashInText;
                    }
                    else if (c == '"')
                    {
                        yield return(new BaseToken(BaseTokenType.text, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }

                case TokenizerState.ReadSlashInText:
                {
                    currentToken += c;
                    state         = TokenizerState.ReadQuote;
                    break;
                }

                case TokenizerState.ReadRound:
                {
                    if (c == ')')
                    {
                        yield return(new BaseToken(BaseTokenType.rndBrktContent, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }

                case TokenizerState.ReadSquare:
                {
                    if (c == ']')
                    {
                        yield return(new BaseToken(BaseTokenType.sqrBrktContent, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }
                }
            }


            switch (state)
            {
            case TokenizerState.ReadQuote:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading text, expected (\")");
            }

            case TokenizerState.ReadSlashInText:
            {
                throw new UnexpectedEndOfInputError("Met end of input after reading \"\\\", expected symbol");
            }

            case TokenizerState.ReadRound:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading round round content, expected \")\"");
            }

            case TokenizerState.ReadSquare:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading round square content, expected \"]\"");
            }

            case TokenizerState.ReadNothing:
            {
                break;
            }
            }
        }
Example #24
0
		private void HandleNcrValue(TokenizerState returnState)
		{
			/*
			 * If one or more characters match the range, then take them all and
			 * interpret the string of characters as a number (either hexadecimal or
			 * decimal as appropriate).
			 */
			if (value <= 0xFFFF)
			{
				if (value >= 0x80 && value <= 0x9f)
				{
					/*
					 * If that number is one of the numbers in the first column of
					 * the following table, then this is a parse error.
					 */
					ErrNcrInC1Range();
					/*
					 * Find the row with that number in the first column, and return
					 * a character token for the Unicode character given in the
					 * second column of that row.
					 */
					char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
					EmitOrAppendOne(val, returnState);
					// [NOCPP[
				}
				else if (value == 0xC
					  && contentSpacePolicy != XmlViolationPolicy.Allow)
				{
					if (contentSpacePolicy == XmlViolationPolicy.AlterInfoset)
					{
						EmitOrAppendOne(SPACE, returnState);
					}
					else if (contentSpacePolicy == XmlViolationPolicy.Fatal)
					{
						Fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
					}
					// ]NOCPP]
				}
				else if (value == 0x0)
				{
					ErrNcrZero();
					EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
				}
				else if ((value & 0xF800) == 0xD800)
				{
					ErrNcrSurrogate();
					EmitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
				}
				else
				{
					/*
					 * Otherwise, return a character token for the Unicode character
					 * whose code point is that number.
					 */
					char ch = (char)value;
					// [NOCPP[
					if (value == 0x0D)
					{
						ErrNcrCr();
					}
					else if ((value <= 0x0008) || (value == 0x000B)
						  || (value >= 0x000E && value <= 0x001F))
					{
						ch = ErrNcrControlChar(ch);
					}
					else if (value >= 0xFDD0 && value <= 0xFDEF)
					{
						ErrNcrUnassigned();
					}
					else if ((value & 0xFFFE) == 0xFFFE)
					{
						ch = ErrNcrNonCharacter(ch);
					}
					else if (value >= 0x007F && value <= 0x009F)
					{
						ErrNcrControlChar();
					}
					else
					{
						MaybeWarnPrivateUse(ch);
					}
					// ]NOCPP]
					bmpChar[0] = ch;
					EmitOrAppendOne(bmpChar, returnState);
				}
			}
			else if (value <= 0x10FFFF)
			{
				// [NOCPP[
				MaybeWarnPrivateUseAstral();
				if ((value & 0xFFFE) == 0xFFFE)
				{
					ErrAstralNonCharacter(value);
				}
				// ]NOCPP]
				astralChar[0] = (char)(LEAD_OFFSET + (value >> 10));
				astralChar[1] = (char)(0xDC00 + (value & 0x3FF));
				EmitOrAppendTwo(astralChar, returnState);
			}
			else
			{
				ErrNcrOutOfRange();
				EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
			}
		}
Example #25
0
		private void EmitOrAppendOne(char[] val, TokenizerState returnState)
		{
			//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
			if (((byte)returnState & DATA_AND_RCDATA_MASK) ==0)
			{
				AppendLongStrBuf(val[0]);
			}
			else
			{
				TokenHandler.Characters(val, 0, 1);
			}
		}
        void StateLoop3_ScriptData(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                   
                    // XXX reorder point
                    case TokenizerState.s06_SCRIPT_DATA:
                        /*scriptdataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data less-than sign state.
                                         */
                                        FlushChars();
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN;
                                        goto breakScriptdataloop; // FALL THRU continue
                                    // stateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataloop:
                            goto case TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN:
                        /*scriptdatalessthansignloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto continueStateloop;
                                    case '!':
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
                                        state = TokenizerState.s20_SCRIPT_DATA_ESCAPE_START;

                                        goto breakScriptdatalessthansignloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    default:
                                        /*
                                         * Otherwise, emit a U+003C LESS-THAN SIGN
                                         * character token
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatalessthansignloop:
                            goto case TokenizerState.s20_SCRIPT_DATA_ESCAPE_START;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s20_SCRIPT_DATA_ESCAPE_START:
                        /*scriptdataescapestartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escape start dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
                                        state = TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH;
                                        goto breakScriptdataescapestartloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapestartloop:
                            goto case TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH:
                        /*scriptdataescapestartdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                                        goto breakScriptdataescapestartdashloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapestartdashloop:
                            goto case TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH:
                        /*scriptdataescapeddashdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Stay in the
                                         * script data escaped dash dash state.
                                         */
                                        continue;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit a U+003E
                                         * GREATER-THAN SIGN character token. Switch to
                                         * the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakScriptdataescapeddashdashloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakScriptdataescapeddashdashloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapeddashdashloop:
                            goto case TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s22_SCRIPT_DATA_ESCAPED:
                        /*scriptdataescapedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
                                        state = TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH;
                                        goto breakScriptdataescapedloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data escaped state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapedloop:
                            goto case TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH:
                        /*scriptdataescapeddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto breakScriptdataescapeddashloop;
                                    // goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapeddashloop:
                            goto case TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
                        /*scriptdataescapedlessthanloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data escaped end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        returnState = TokenizerState.s22_SCRIPT_DATA_ESCAPED;

                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto continueStateloop;
                                    case 'S':
                                    case 's':
                                        /*
                                         * U+0041 LATIN CAPITAL LETTER A through to
                                         * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
                                         * LESS-THAN SIGN character token and the
                                         * current input character as a character token.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        index = 1;
                                        /*
                                         * Set the temporary buffer to the empty string.
                                         * Append the lowercase TokenizerState.version of the current
                                         * input character (add 0x0020 to the
                                         * character's code point) to the temporary
                                         * buffer. Switch to the script data double
                                         * escape start state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
                                        state = TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START;
                                        goto breakScriptdataescapedlessthanloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Emit a U+003C LESS-THAN SIGN
                                         * character token and reconsume the current
                                         * input character in the script data escaped
                                         * state.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapedlessthanloop:
                            goto case TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START:
                        /*scriptdatadoubleescapestartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                Debug.Assert(index > 0);
                                if (index < 6)
                                {
                                    // SCRIPT_ARR.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        //make it lower case 
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.SCRIPT_ARR[index])
                                    {
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                switch (c)
                                {
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                    case '/':
                                    case '>':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
                                         * (>) Emit the current input character as a
                                         * character token. If the temporary buffer is
                                         * the string "script", then switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakScriptdatadoubleescapestartloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data escaped state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapestartloop:
                            goto case TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED:
                        /*scriptdatadoubleescapedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data double escaped dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
                                        state = TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
                                        goto breakScriptdatadoubleescapedloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data double escaped state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapedloop:
                            goto case TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
                        /*scriptdatadoubleescapeddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data double escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
                                        goto breakScriptdatadoubleescapeddashloop;
                                    // goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapeddashloop:
                            goto case TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
                        /*scriptdatadoubleescapeddashdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Stay in the
                                         * script data double escaped dash dash state.
                                         */
                                        continue;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto breakScriptdatadoubleescapeddashdashloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit a U+003E
                                         * GREATER-THAN SIGN character token. Switch to
                                         * the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapeddashdashloop:
                            goto case TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
                        /*scriptdatadoubleescapedlessthanloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
                                         * character token. Set the temporary buffer to
                                         * the empty string. Switch to the script data
                                         * double escape end state.
                                         */
                                        index = 0;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
                                        state = TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END;
                                        goto breakScriptdatadoubleescapedlessthanloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data double escaped
                                         * state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapedlessthanloop:
                            goto case TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END:
                        /*scriptdatadoubleescapeendloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                if (index < 6)
                                {
                                    // SCRIPT_ARR.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.SCRIPT_ARR[index])
                                    {
                                        reader.StepBack();
                                        //reconsume = true;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                switch (c)
                                {
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                    case '/':
                                    case '>':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
                                         * (>) Emit the current input character as a
                                         * character token. If the temporary buffer is
                                         * the string "script", then switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Reconsume the current input character in the
                                         * script data double escaped state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                        }
                        //------------------------------------
                        //eof
                        goto breakStateloop; 
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }
Example #27
0
 void EmitOrAppendOne(char[] val, TokenizerState returnState)
 {
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
     }
     else
     {
         TokenListener.Characters(val, 0, 1);
     }
 }
Example #28
0
 /// <summary>
 /// Initializes a new instance of the <see cref="UnclosedStringLiteralException"/> class.
 /// </summary>
 /// <param name="tokenizerState">State of the tokenizer.</param>
 /// <param name="batchSource">The batch source.</param>
 internal UnclosedStringLiteralException(TokenizerState tokenizerState, IBatchSource batchSource)
     : base(tokenizerState, batchSource)
 {
 }
Example #29
0
 private void StartToken(TokenizerState tokenizerState, string character = "")
 {
     _currentTokenizerState = tokenizerState;
     _currentToken.Text    += character;
 }
Example #30
0
        internal static bool TryWrite(XmlWriter writer, T instance, TokenizerArgs args)
        {
            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }
            else if (instance == null)
            {
                throw new ArgumentNullException("instance");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            Hashtable written = new Hashtable();

            using (TokenizerState <T> state = Tokenizer.NewState <T>(args, instance))
            {
                // Step 1: Try to write tokens as attributes
                foreach (TokenMember member in state.Definition.AllTokenMembers)
                {
                    object[] values = member.GetValues(state);

                    if (member.Tokens.Count > 0 && member.Groups.Count > 0)
                    {
                        continue;                         // Write the members as element
                    }
                    if ((values == null) || (values.Length == 0) || values.Length > 1)
                    {
                        continue;
                    }
                    else if (member.Tokens.Count <= 0)
                    {
                        continue;
                    }

                    written[member] = member;

                    foreach (object value in values)
                    {
                        if (value == null)
                        {
                            continue;
                        }

                        Type type = value.GetType();

                        foreach (TokenItem ti in member.Tokens)
                        {
                            if (ti.Name == null)
                            {
                                continue;
                            }

                            if (ti.ValueType != null && !ti.ValueType.IsAssignableFrom(type))
                            {
                                continue;
                            }

                            // Will throw if multiple times written -> Definition bug, resolve there
                            writer.WriteAttributeString(ti.Name, ti.GetStringValue(value, state));
                            break;
                        }
                    }
                }

                // Step 2: Write tokengroups and members with multiple values
                foreach (TokenMember member in state.Definition.AllTokenMembers)
                {
                    if (written.Contains(member))
                    {
                        continue;
                    }

                    object[] values = member.GetValues(state);

                    if ((values == null) || (values.Length == 0))
                    {
                        continue;
                    }

                    foreach (object value in values)
                    {
                        if (value == null)
                        {
                            continue;
                        }

                        Type type = value.GetType();

                        bool writtenItem = false;

                        foreach (TokenGroupItem tg in member.Groups)
                        {
                            if (tg.ValueType != null && !tg.ValueType.IsAssignableFrom(type))
                            {
                                continue;
                            }

                            writer.WriteStartElement(tg.Name);

                            // Will throw if multiple times written -> Definition bug, resolve there
                            if (!tg.TryWriteXml(writer, args.Clone(state.Instance), value))
                            {
                                return(false);
                            }

                            writer.WriteEndElement();
                            writtenItem = true;
                            break;
                        }

                        if (!writtenItem)
                        {
                            foreach (TokenItem ti in member.Tokens)
                            {
                                if (ti.Name == null)
                                {
                                    continue;
                                }

                                if (ti.ValueType != null && !ti.ValueType.IsAssignableFrom(type))
                                {
                                    continue;
                                }

                                // Will throw if multiple times written -> Definition bug, resolve there
                                writer.WriteElementString(ti.Name, ti.GetStringValue(value, state));
                                break;
                            }
                        }
                    }
                }
            }

            return(true);
        }
Example #31
0
        internal static bool TryParse(IXPathNavigable element, TokenizerArgs args, out T to)
        {
            XPathNavigator nav = element.CreateNavigator();

            to = null;
            using (TokenizerState <T> state = Tokenizer.NewState <T>(args))
            {
                if (nav.MoveToFirstAttribute())
                {
                    do
                    {
                        TokenItem ti;

                        if (!state.Definition.TryGetToken(nav.LocalName, args.CaseSensitive, out ti))
                        {
                            if (args.SkipUnknownNamedItems)
                            {
                                continue;
                            }
                            else
                            {
                                return(false);
                            }
                        }

                        ti.Evaluate(nav.Value, state);
                    }while (nav.MoveToNextAttribute());
                    nav.MoveToParent();
                }

                if (nav.HasChildren)
                {
                    if (nav.MoveToFirstChild())
                    {
                        do
                        {
                            string name = nav.LocalName;

                            TokenGroupItem group;
                            TokenItem      ti;
                            if (state.Definition.TryGetGroup(name, args.CaseSensitive, out group))
                            {
                                object value;

                                if (!group.TryParseXml(nav, args.Clone(state.Instance), out value))
                                {
                                    return(false);
                                }

                                group.Member.SetValue(state, value);
                            }
                            else if (state.Definition.TryGetToken(name, args.CaseSensitive, out ti))
                            {
                                // Allow tokens as element
                                ti.Evaluate(nav.Value, state);
                            }
                            else if (!args.SkipUnknownNamedItems)
                            {
                                return(false);
                            }
                        }while (nav.MoveToNext(XPathNodeType.Element));
                    }
                }

                to = state.Instance;
                return(true);
            }
        }
        public static List <Token> Tokenize(string buffer)
        {
            TokenizerOutput ctx   = new TokenizerOutput();
            TokenizerState  state = new TokenizerState(buffer);

            while (!state.IsEndOfStream())
            {
                while (!state.IsEndOfStream() && state.GetChar() != '\n' && char.IsWhiteSpace(state.GetChar()))
                {
                    state.NextChar();
                }
                if (state.IsEndOfStream())
                {
                    break;
                }
                char c = state.GetChar();
                switch (c)
                {
                case '\n':
                    state.NextLine();
                    state.NextChar();
                    break;

                case '(':
                    ctx.AddSymbol(TokenType.BraceBegin, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case ')':
                    ctx.AddSymbol(TokenType.BraceEnd, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case ',':
                    ctx.AddSymbol(TokenType.ArgumentSeparator, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case '*':
                    ctx.AddSymbol(TokenType.Pointer, c, state.CreateInfo());
                    state.NextChar();
                    break;

                default:
                    if (char.IsLetter(c) || c == '_')
                    {
                        int start = state.BufferPos;
                        while (!state.IsEndOfStream() && (char.IsLetterOrDigit(state.GetChar()) || (state.GetChar() == '_')))
                        {
                            state.NextChar();
                        }
                        int    len   = state.BufferPos - start;
                        string ident = buffer.Substring(start, len);
                        ctx.AddIdent(ident, state.CreateInfo());
                    }
                    else
                    {
                        ctx.AddChar(c, state.CreateInfo());
                        state.NextChar();
                    }
                    break;
                }
            }
            return(ctx.Tokens);
        }
Example #33
0
        // ]NOCPP]

        // For the token handler to call
        /**
         * Sets the tokenizer state and the associated element name. This should 
         * only ever used to put the tokenizer into one of the states that have
         * a special end tag expectation.
         * 
         * @param specialTokenizerState
         *            the tokenizer state to set
         * @param endTagExpectation
         *            the expected end tag for transitioning back to normal
         */
        public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
                [Local] String endTagExpectation)
        {
            this.stateSave = specialTokenizerState;
            if (specialTokenizerState == TokenizerState.s01_DATA)
            {
                return;
            }
            this.endTagExpectation = ElementName.ElementNameByBuffer(endTagExpectation.ToCharArray());
            EndTagExpectationToArray();
        }
Example #34
0
		private int StateLoop(TokenizerState state, char c,
                int pos, char[] buf, bool reconsume, TokenizerState returnState,
				int endPos)
		{
			/*
			 * Idioms used in this code:
			 * 
			 * 
			 * Consuming the next input character
			 * 
			 * To consume the next input character, the code does this: if (++pos ==
			 * endPos) { goto breakStateloop; } c = buf[pos];
			 * 
			 * 
			 * Staying in a state
			 * 
			 * When there's a state that the tokenizer may stay in over multiple
			 * input characters, the state has a wrapper |for(;;)| loop and staying
			 * in the state continues the loop.
			 * 
			 * 
			 * Switching to another state
			 * 
			 * To switch to another state, the code sets the state variable to the
			 * magic number of the new state. Then it either continues stateloop or
			 * breaks out of the state's own wrapper loop if the target state is
			 * right after the current state in source order. (This is a partial
			 * workaround for Java's lack of goto.)
			 * 
			 * 
			 * Reconsume support
			 * 
			 * The spec sometimes says that an input character is reconsumed in
			 * another state. If a state can ever be entered so that an input
			 * character can be reconsumed in it, the state's code starts with an
			 * |if (reconsume)| that sets reconsume to false and skips over the
			 * normal code for consuming a new character.
			 * 
			 * To reconsume the current character in another state, the code sets
			 * |reconsume| to true and then switches to the other state.
			 * 
			 * 
			 * Emitting character tokens
			 * 
			 * This method emits character tokens lazily. Whenever a new range of
			 * character tokens starts, the field cstart must be set to the start
			 * index of the range. The flushChars() method must be called at the end
			 * of a range to flush it.
			 * 
			 * 
			 * U+0000 handling
			 * 
			 * The various states have to handle the replacement of U+0000 with
			 * U+FFFD. However, if U+0000 would be reconsumed in another state, the
			 * replacement doesn't need to happen, because it's handled by the
			 * reconsuming state.
			 * 
			 * 
			 * LF handling
			 * 
			 * Every state needs to increment the line number upon LF unless the LF
			 * gets reconsumed by another state which increments the line number.
			 * 
			 * 
			 * CR handling
			 * 
			 * Every state needs to handle CR unless the CR gets reconsumed and is
			 * handled by the reconsuming state. The CR needs to be handled as if it
			 * were and LF, the lastCR field must be set to true and then this
			 * method must return. The IO driver will then swallow the next
			 * character if it is an LF to coalesce CRLF.
			 */

			/*
			 * As there is no support for labeled loops in C#, instead of break <loop>;
			 * the port uses goto break<loop>; and a label after the loop.
			 * Instead of continue <loop>; it uses goto continue<loop>; and a label
			 * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
			 */

			/*stateloop:*/
			for (; ; )
			{
			continueStateloop:

				switch (state)
				{
					case TokenizerState.DATA:
						/*dataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in data state.
									 */
									FlushChars(buf, pos);
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\u0000');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the tag
									 * open state.
									 */
									FlushChars(buf, pos);

									//state = Transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
							        state = TokenizerState.TAG_OPEN;
									goto breakDataloop; // FALL THROUGH continue
								// stateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the input character as a
									 * character token.
									 * 
									 * Stay in the data state.
									 */
									continue;
							}
						}
					breakDataloop:
						goto case TokenizerState.TAG_OPEN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.TAG_OPEN:
						/*tagopenloop:*/
						for (; ; )
						{
							/*
							 * The behavior of this state depends on the content
							 * model flag.
							 */
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * If the content model flag is set to the PCDATA state
							 * Consume the next input character:
							 */
							if (c >= 'A' && c <= 'Z')
							{
								/*
								 * U+0041 LATIN CAPITAL LETTER A through to U+005A
								 * LATIN CAPITAL LETTER Z Create a new start tag
								 * token,
								 */
								endTag = false;
								/*
								 * set its tag name to the lowercase TokenizerState.version of the
								 * input character (add 0x0020 to the character's
								 * code point),
								 */
								ClearStrBufAndAppend((char)(c + 0x20));
								/* then switch to the tag name state. */
								//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                state = TokenizerState.TAG_NAME;
								/*
								 * (Don't emit the token yet; further details will
								 * be filled in before it is emitted.)
								 */
								goto breakTagopenloop;
								// goto continueStateloop;
							}
							else if (c >= 'a' && c <= 'z')
							{
								/*
								 * U+0061 LATIN SMALL LETTER A through to U+007A
								 * LATIN SMALL LETTER Z Create a new start tag
								 * token,
								 */
								endTag = false;
								/*
								 * set its tag name to the input character,
								 */
								ClearStrBufAndAppend(c);
								/* then switch to the tag name state. */
								//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                state = TokenizerState.TAG_NAME;
								/*
								 * (Don't emit the token yet; further details will
								 * be filled in before it is emitted.)
								 */
								goto breakTagopenloop;
								// goto continueStateloop;
							}
							switch (c)
							{
								case '!':
									/*
									 * U+0021 EXCLAMATION MARK (!) Switch to the
									 * markup declaration open state.
									 */
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_OPEN;
									goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the close tag
									 * open state.
									 */
									//state = Transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
                                    state = TokenizerState.CLOSE_TAG_OPEN;
									goto continueStateloop;
								case '?':
									/*
									 * U+003F QUESTION MARK (?) Parse error.
									 */
									ErrProcessingInstruction();
									/*
									 * Switch to the bogus comment state.
									 */
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrLtGt();
									/*
									 * Emit a U+003C LESS-THAN SIGN character token
									 * and a U+003E GREATER-THAN SIGN character
									 * token.
									 */
									TokenHandler.Characters(LT_GT, 0, 2);
									/* Switch to the data state. */
									cstart = pos + 1;
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrBadCharAfterLt(c);
									/*
									 * Emit a U+003C LESS-THAN SIGN character token
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakTagopenloop:
						goto case TokenizerState.TAG_NAME;
					// FALL THROUGH DON'T REORDER
					case TokenizerState.TAG_NAME:
						/*tagnameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
									state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakTagnameloop;
								// goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									StrBufToElementNameString();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Append the
										 * lowercase TokenizerState.version of the current input
										 * character (add 0x0020 to the character's
										 * code point) to the current tag token's
										 * tag name.
										 */
										c += (char)0x20;
									}
									/*
									 * Anything else Append the current input
									 * character to the current tag token's tag
									 * name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the tag name state.
									 */
									continue;
							}
						}
					breakTagnameloop:
						goto case TokenizerState.BEFORE_ATTRIBUTE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_ATTRIBUTE_NAME:
						/*beforeattributenameloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before attribute name state.
									 */
									continue;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;

									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '\"';
								case '\"':
								case '\'':
								case '<':
								case '=':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
									 * SIGN (=) Parse error.
									 */
									ErrBadCharBeforeAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									/*
									 * Anything else Start a new attribute in the
									 * current tag token.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Set that
										 * attribute's name to the lowercase TokenizerState.version
										 * of the current input character (add
										 * 0x0020 to the character's code point)
										 */
										c += (char)0x20;
									}
									/*
									 * Set that attribute's name to the current
									 * input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * and its value to the empty string.
									 */
									// Will do later.
									/*
									 * Switch to the attribute name state.
									 */
									//state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_NAME;
									goto breakBeforeattributenameloop;
								// goto continueStateloop;
							}
						}
					breakBeforeattributenameloop:
						goto case TokenizerState.ATTRIBUTE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.ATTRIBUTE_NAME:
						/*attributenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the after attribute name state.
									 */
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_NAME;
                                    goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									AttributeNameComplete();
									AddAttributeWithoutValue();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '=':
									/*
									 * U+003D EQUALS SIGN (=) Switch to the before
									 * attribute value state.
									 */
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_VALUE;
									goto breakAttributenameloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AttributeNameComplete();
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '\"';
								case '\"':
								case '\'':
								case '<':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) Parse error.
									 */
									ErrQuoteOrLtInAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Append the
										 * lowercase TokenizerState.version of the current input
										 * character (add 0x0020 to the character's
										 * code point) to the current attribute's
										 * name.
										 */
										c += (char)0x20;
									}
									/*
									 * Anything else Append the current input
									 * character to the current attribute's name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the attribute name state.
									 */
									continue;
							}
						}
					breakAttributenameloop:
						goto case TokenizerState.BEFORE_ATTRIBUTE_VALUE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_ATTRIBUTE_VALUE:
						/*beforeattributevalueloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before attribute value state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the
									 * attribute value (double-quoted) state.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED;

									goto breakBeforeattributevalueloop;
								// goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the attribute
									 * value (unquoted) state and reconsume this
									 * input character.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_UNQUOTED;
									NoteUnquotedAttributeValue();
									reconsume = true;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the attribute
									 * value (single-quoted) state.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrAttributeValueMissing();
									/*
									 * Emit the current tag token.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '<';
								case '<':
								case '=':
								case '`':
									/*
									 * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
									 * (=) U+0060 GRAVE ACCENT (`)
									 */
									ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									// [NOCPP[
									ErrHtml4NonNameInUnquotedAttribute(c);
									// ]NOCPP]
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									ClearLongStrBufAndAppend(c);
									/*
									 * Switch to the attribute value (unquoted)
									 * state.
									 */

									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_UNQUOTED;

									NoteUnquotedAttributeValue();
									goto continueStateloop;
							}
						}
					breakBeforeattributevalueloop:
						goto case TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED:
						/*attributevaluedoublequotedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * attribute value (quoted) state.
									 */
									AddAttributeWithValue();

									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
									goto breakAttributevaluedoublequotedloop;
								// goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * additional allowed character being U+0022
									 * QUOTATION MARK (").
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\"');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (double-quoted)
									 * state.
									 */
									continue;
							}
						}
					breakAttributevaluedoublequotedloop:
						goto case TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED:
						/*afterattributevaluequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto breakAfterattributevaluequotedloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrNoSpaceBetweenAttributes();
									/*
									 * Reconsume the character in the before
									 * attribute name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakAfterattributevaluequotedloop:
						goto case TokenizerState.SELF_CLOSING_START_TAG;
					// FALLTHRU DON'T REORDER
					case TokenizerState.SELF_CLOSING_START_TAG:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						/*
						 * Consume the next input character:
						 */
						switch (c)
						{
							case '>':
								/*
								 * U+003E GREATER-THAN SIGN (>) Set the self-closing
								 * flag of the current tag token. Emit the current
								 * tag token.
								 */
								// [NOCPP[
								ErrHtml4XmlVoidSyntax();
								// ]NOCPP]
								//state = Transition(state, EmitCurrentTagToken(true, pos), reconsume, pos);
						        state = EmitCurrentTagToken(true, pos);
								if (shouldSuspend)
								{
									goto breakStateloop;
								}
								/*
								 * Switch to the data state.
								 */
								goto continueStateloop;
							default:
								/* Anything else Parse error. */
								ErrSlashNotFollowedByGt();
								/*
								 * Reconsume the character in the before attribute
								 * name state.
								 */
								//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
						        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;

								reconsume = true;
								goto continueStateloop;
						}
					// XXX reorder point
					case TokenizerState.ATTRIBUTE_VALUE_UNQUOTED:
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									AddAttributeWithValue();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									AddAttributeWithValue();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * additional allowed character being U+003E
									 * GREATER-THAN SIGN (>)
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('>');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AddAttributeWithValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto case '<';
								// fall thru
								case '<':
								case '\"':
								case '\'':
								case '=':
								case '`':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
									 * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
									 */
									ErrUnquotedAttributeValOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									// fall through
									goto default;
								default:
									// [NOCPP]
									ErrHtml4NonNameInUnquotedAttribute(c);
									// ]NOCPP]
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (unquoted) state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.AFTER_ATTRIBUTE_NAME:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after attribute name state.
									 */
									continue;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '=':
									/*
									 * U+003D EQUALS SIGN (=) Switch to the before
									 * attribute value state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_VALUE;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto case '\"';
								// fall thru
								case '\"':
								case '\'':
								case '<':
									ErrQuoteOrLtInAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									AddAttributeWithoutValue();
									/*
									 * Anything else Start a new attribute in the
									 * current tag token.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Set that
										 * attribute's name to the lowercase TokenizerState.version
										 * of the current input character (add
										 * 0x0020 to the character's code point)
										 */
										c += (char)0x20;
									}
									/*
									 * Set that attribute's name to the current
									 * input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * and its value to the empty string.
									 */
									// Will do later.
									/*
									 * Switch to the attribute name state.
									 */
									//state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_NAME;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.MARKUP_DECLARATION_OPEN:
						/*markupdeclarationopenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * If the next two characters are both U+002D
							 * HYPHEN-MINUS characters (-), consume those two
							 * characters, create a comment token whose data is the
							 * empty string, and switch to the comment start state.
							 * 
							 * Otherwise, if the next seven characters are an ASCII
							 * case-insensitive match for the word "DOCTYPE", then
							 * consume those characters and switch to the DOCTYPE
							 * state.
							 * 
							 * Otherwise, if the insertion mode is
							 * "in foreign content" and the current node is not an
							 * element in the HTML namespace and the next seven
							 * characters are an case-sensitive match for the string
							 * "[CDATA[" (the five uppercase TokenizerState.letters "CDATA" with a
							 * U+005B LEFT SQUARE BRACKET character before and
							 * after), then consume those characters and switch to
							 * the CDATA section state.
							 * 
							 * Otherwise, is is a parse error. Switch to the bogus
							 * comment state. The next character that is consumed,
							 * if any, is the first character that will be in the
							 * comment.
							 */
							switch (c)
							{
								case '-':
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_HYPHEN;
									goto breakMarkupdeclarationopenloop;
								// goto continueStateloop;
								case 'd':
								case 'D':
									ClearLongStrBufAndAppend(c);
									index = 0;
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_OCTYPE;
									goto continueStateloop;
								case '[':
									if (TokenHandler.IsCDataSectionAllowed)
									{
										ClearLongStrBufAndAppend(c);
										index = 0;
										//state = Transition(state, Tokenizer.CDATA_START, reconsume, pos);
									    state = TokenizerState.CDATA_START;
										goto continueStateloop;
									}
									else
									{
										// else fall through
										goto default;
									}
								default:
									ErrBogusComment();
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakMarkupdeclarationopenloop:
						goto case TokenizerState.MARKUP_DECLARATION_HYPHEN;
					// FALLTHRU DON'T REORDER
					case TokenizerState.MARKUP_DECLARATION_HYPHEN:
						/*markupdeclarationhyphenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							switch (c)
							{
								case '\u0000':
									goto breakStateloop;
								case '-':
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.COMMENT_START, reconsume, pos);
							        state = TokenizerState.COMMENT_START;
									goto breakMarkupdeclarationhyphenloop;
								// goto continueStateloop;
								default:
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakMarkupdeclarationhyphenloop:
						goto case TokenizerState.COMMENT_START;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_START:
						/*commentstartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment start state
							 * 
							 * 
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * start dash state.
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
							        state = TokenizerState.COMMENT_START_DASH;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrPrematureEndOfComment();
									/* Emit the comment token. */
									EmitComment(0, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;

									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									// state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;

									goto breakCommentstartloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the input character to
									 * the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;

									goto breakCommentstartloop;
								// goto continueStateloop;
							}
						}
					breakCommentstartloop:
						goto case TokenizerState.COMMENT;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT:
						/*commentloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment state Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * end dash state
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
							        state = TokenizerState.COMMENT_END_DASH;
									goto breakCommentloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the input character to
									 * the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the comment state.
									 */
									continue;
							}
						}
					breakCommentloop:
						goto case TokenizerState.COMMENT_END_DASH;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_END_DASH:
						/*commentenddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end dash state Consume the next input
							 * character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * end state
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
							        state = TokenizerState.COMMENT_END;
									goto breakCommentenddashloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									/*
									 * Anything else Append a U+002D HYPHEN-MINUS
									 * (-) character and the input character to the
									 * comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					breakCommentenddashloop:
						goto case TokenizerState.COMMENT_END;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_END:
						/*commentendloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end dash state Consume the next input
							 * character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the comment
									 * token.
									 */
									EmitComment(2, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									/* U+002D HYPHEN-MINUS (-) Parse error. */
									/*
									 * Append a U+002D HYPHEN-MINUS (-) character to
									 * the comment token's data.
									 */
									AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
									/*
									 * Stay in the comment end state.
									 */
									continue;
								case '\r':
									AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AdjustDoubleHyphenAndAppendToLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
								case '!':
									ErrHyphenHyphenBang();
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
							        state = TokenizerState.COMMENT_END_BANG;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Append two U+002D HYPHEN-MINUS (-) characters
									 * and the input character to the comment
									 * token's data.
									 */
									AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.COMMENT_END_BANG:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end bang state
							 * 
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the comment
									 * token.
									 */
									EmitComment(3, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									/*
									 * Append two U+002D HYPHEN-MINUS (-) characters
									 * and a U+0021 EXCLAMATION MARK (!) character
									 * to the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment end dash state.
									 */
									//state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                    state = TokenizerState.COMMENT_END_DASH;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append two U+002D HYPHEN-MINUS
									 * (-) characters, a U+0021 EXCLAMATION MARK (!)
									 * character, and the input character to the
									 * comment token's data. Switch to the comment
									 * state.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.COMMENT_START_DASH:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						/*
						 * Comment start dash state
						 * 
						 * Consume the next input character:
						 */
						switch (c)
						{
							case '-':
								/*
								 * U+002D HYPHEN-MINUS (-) Switch to the comment end
								 * state
								 */
								AppendLongStrBuf(c);
								//state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                state = TokenizerState.COMMENT_END;
								goto continueStateloop;
							case '>':
								ErrPrematureEndOfComment();
								/* Emit the comment token. */
								EmitComment(1, pos);
								/*
								 * Switch to the data state.
								 */
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							case '\r':
								AppendLongStrBufCarriageReturn();
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto breakStateloop;
							case '\n':
								AppendLongStrBufLineFeed();
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto continueStateloop;
							case '\u0000':
								c = '\uFFFD';
								// fall thru
								goto default;
							default:
								/*
								 * Append a U+002D HYPHEN-MINUS character (-) and
								 * the current input character to the comment
								 * token's data.
								 */
								AppendLongStrBuf(c);
								/*
								 * Switch to the comment state.
								 */
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto continueStateloop;
						}
					// XXX reorder point
					case TokenizerState.CDATA_START:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							if (index < 6)
							{ // CDATA_LSQB.Length
								if (c == Tokenizer.CDATA_LSQB[index])
								{
									AppendLongStrBuf(c);
								}
								else
								{
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
								cstart = pos; // start coalescing
								//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                state = TokenizerState.CDATA_SECTION;
								reconsume = true;
								break; // FALL THROUGH goto continueStateloop;
							}
						}
						goto case TokenizerState.CDATA_SECTION;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_SECTION:
						/*cdatasectionloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							switch (c)
							{
								case ']':
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
							        state = TokenizerState.CDATA_RSQB;
									goto breakCdatasectionloop; // FALL THROUGH
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								// fall thru
								default:
									continue;
							}
						}
					breakCdatasectionloop:
						goto case TokenizerState.CDATA_RSQB;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_RSQB:
						/*cdatarsqb:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							switch (c)
							{
								case ']':
									//state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
							        state = TokenizerState.CDATA_RSQB_RSQB;

									goto breakCdatarsqb;
								default:
									TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 1);
									cstart = pos;
									//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
							        state = TokenizerState.CDATA_SECTION;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakCdatarsqb:
						goto case TokenizerState.CDATA_RSQB_RSQB;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_RSQB_RSQB:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						switch (c)
						{
							case '>':
								cstart = pos + 1;
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							default:
								TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 2);
								cstart = pos;
								//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
						        state = TokenizerState.CDATA_SECTION;

								reconsume = true;
								goto continueStateloop;

						}
					// XXX reorder point
					case TokenizerState.ATTRIBUTE_VALUE_SINGLE_QUOTED:
						/*attributevaluesinglequotedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * attribute value (quoted) state.
									 */
									AddAttributeWithValue();

									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
									goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * + additional allowed character being U+0027
									 * APOSTROPHE (').
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\'');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto breakAttributevaluesinglequotedloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (double-quoted)
									 * state.
									 */
									continue;
							}
						}
					breakAttributevaluesinglequotedloop:
						goto case TokenizerState.CONSUME_CHARACTER_REFERENCE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.CONSUME_CHARACTER_REFERENCE:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						if (c == '\u0000')
						{
							goto breakStateloop;
						}
						/*
						 * Unlike the definition is the spec, this state does not
						 * return a value and never requires the caller to
						 * backtrack. This state takes care of emitting characters
						 * or appending to the current attribute value. It also
						 * takes care of that in the case TokenizerState.when consuming the
						 * character reference fails.
						 */
						/*
						 * This section defines how to consume a character
						 * reference. This definition is used when parsing character
						 * references in text and in attributes.
						 * 
						 * The behavior depends on the identity of the next
						 * character (the one immediately after the U+0026 AMPERSAND
						 * character):
						 */
						switch (c)
						{
							case ' ':
							case '\t':
							case '\n':
							case '\r': // we'll reconsume!
							case '\u000C':
							case '<':
							case '&':
								EmitOrAppendStrBuf(returnState);
								//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
								{
									cstart = pos;
								}
								//state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
								reconsume = true;
								goto continueStateloop;
							case '#':
								/*
								 * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
								 * SIGN.
								 */
								AppendStrBuf('#');
								//state = Transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
						        state = TokenizerState.CONSUME_NCR;
								goto continueStateloop;
							default:
								if (c == additional)
								{
									EmitOrAppendStrBuf(returnState);
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								if (c >= 'a' && c <= 'z')
								{
									firstCharKey = c - 'a' + 26;
								}
								else if (c >= 'A' && c <= 'Z')
								{
									firstCharKey = c - 'A';
								}
								else
								{
									// No match
									/*
									 * If no match can be made, then this is a parse
									 * error.
									 */
									ErrNoNamedCharacterMatch();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
								    if (((byte)returnState & DATA_AND_RCDATA_MASK) !=0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								// Didn't fail yet
								AppendStrBuf(c);
								//state = Transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
						        state = TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;

								// FALL THROUGH goto continueStateloop;
								break;
						}
						goto case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP:
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (c == '\u0000')
							{
								goto breakStateloop;
							}
							/*
							 * The data structure is as follows:
							 * 
							 * HILO_ACCEL is a two-dimensional int array whose major
							 * index corresponds to the second character of the
							 * character reference (code point as index) and the
							 * minor index corresponds to the first character of the
							 * character reference (packed so that A-Z runs from 0
							 * to 25 and a-z runs from 26 to 51). This layout makes
							 * it easier to use the sparseness of the data structure
							 * to omit parts of it: The second dimension of the
							 * table is null when no character reference starts with
							 * the character corresponding to that row.
							 * 
							 * The int value HILO_ACCEL (by these indeces) is zero
							 * if there exists no character reference starting with
							 * that two-letter prefix. Otherwise, the value is an
							 * int that packs two shorts so that the higher short is
							 * the index of the highest character reference name
							 * with that prefix in NAMES and the lower short
							 * corresponds to the index of the lowest character
							 * reference name with that prefix. (It happens that the
							 * first two character reference names share their
							 * prefix so the packed int cannot be 0 by packing the
							 * two shorts.)
							 * 
							 * NAMES is an array of byte arrays where each byte
							 * array encodes the name of a character references as
							 * ASCII. The names omit the first two letters of the
							 * name. (Since storing the first two letters would be
							 * redundant with the data contained in HILO_ACCEL.) The
							 * entries are lexically sorted.
							 * 
							 * For a given index in NAMES, the same index in VALUES
							 * contains the corresponding expansion as an array of
							 * two UTF-16 code units (either the character and
							 * U+0000 or a suggogate pair).
							 */
							int hilo = 0;
							if (c <= 'z')
							{
								int[] row = NamedCharactersAccel.HILO_ACCEL[c];
								if (row != null)
								{
									hilo = row[firstCharKey];
								}
							}
							if (hilo == 0)
							{
								/*
								 * If no match can be made, then this is a parse
								 * error.
								 */
								ErrNoNamedCharacterMatch();
								EmitOrAppendStrBuf(returnState);
								//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
								{
									cstart = pos;
								}
								//state = Transition(state, returnState, reconsume, pos);
								state = returnState;
								reconsume = true;
								goto continueStateloop;
							}
							// Didn't fail yet
							AppendStrBuf(c);
							lo = hilo & 0xFFFF;
							hi = hilo >> 16;
							entCol = -1;
							candidate = -1;
							strBufMark = 0;
							//state = Transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
						    state = TokenizerState.CHARACTER_REFERENCE_TAIL;
							// FALL THROUGH goto continueStateloop;
							goto case TokenizerState.CHARACTER_REFERENCE_TAIL;
						}
					case TokenizerState.CHARACTER_REFERENCE_TAIL:
						/*outer:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (c == '\u0000')
							{
								goto breakStateloop;
							}
							entCol++;
							/*
							 * Consume the maximum number of characters possible,
							 * with the consumed characters matching one of the
							 * identifiers in the first column of the named
							 * character references table (in a case-sensitive
							 * manner).
							 */
							/*loloop:*/
							for (; ; )
							{
								if (hi < lo)
								{
									goto breakOuter;
								}
								if (entCol == NamedCharacters.NAMES[lo].Length)
								{
									candidate = lo;
									strBufMark = strBufLen;
									lo++;
								}
								else if (entCol > NamedCharacters.NAMES[lo].Length)
								{
									goto breakOuter;
								}
								else if (c > NamedCharacters.NAMES[lo][entCol])
								{
									lo++;
								}
								else
								{
									goto breakLoloop;
								}
							}

						breakLoloop:

							/*hiloop:*/
							for (; ; )
							{
								if (hi < lo)
								{
									goto breakOuter;
								}
								if (entCol == NamedCharacters.NAMES[hi].Length)
								{
									goto breakHiloop;
								}
								if (entCol > NamedCharacters.NAMES[hi].Length)
								{
									goto breakOuter;
								}
								else if (c < NamedCharacters.NAMES[hi][entCol])
								{
									hi--;
								}
								else
								{
									goto breakHiloop;
								}
							}

						breakHiloop:

							if (hi < lo)
							{
								goto breakOuter;
							}
							AppendStrBuf(c);
							continue;
						}

					breakOuter:

						if (candidate == -1)
						{
							// reconsume deals with CR, LF or nul
							/*
							 * If no match can be made, then this is a parse error.
							 */
							ErrNoNamedCharacterMatch();
							EmitOrAppendStrBuf(returnState);
							//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                            if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
							{
								cstart = pos;
							}
							//state = Transition(state, returnState, reconsume, pos);
							state = returnState;
							reconsume = true;
							goto continueStateloop;
						}
						else
						{
							// c can't be CR, LF or nul if we got here
							string candidateName = NamedCharacters.NAMES[candidate];
							if (candidateName.Length == 0
									|| candidateName[candidateName.Length - 1] != ';')
							{
								/*
								 * If the last character matched is not a U+003B
								 * SEMICOLON (;), there is a parse error.
								 */
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									/*
									 * If the entity is being consumed as part of an
									 * attribute, and the last character matched is
									 * not a U+003B SEMICOLON (;),
									 */
									char ch;
									if (strBufMark == strBufLen)
									{
										ch = c;
									}
									else
									{
										// if (strBufOffset != -1) {
										// ch = buf[strBufOffset + strBufMark];
										// } else {
										ch = strBuf[strBufMark];
										// }
									}
									if (ch == '=' || (ch >= '0' && ch <= '9')
											|| (ch >= 'A' && ch <= 'Z')
											|| (ch >= 'a' && ch <= 'z'))
									{
										/*
										 * and the next character is either a U+003D
										 * EQUALS SIGN character (=) or in the range
										 * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
										 * U+0041 LATIN CAPITAL LETTER A to U+005A
										 * LATIN CAPITAL LETTER Z, or U+0061 LATIN
										 * SMALL LETTER A to U+007A LATIN SMALL
										 * LETTER Z, then, for historical reasons,
										 * all the characters that were matched
										 * after the U+0026 AMPERSAND (&) must be
										 * unconsumed, and nothing is returned.
										 */
										ErrNoNamedCharacterMatch();
										AppendStrBufToLongStrBuf();
										//state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
										reconsume = true;
										goto continueStateloop;
									}
								}
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									ErrUnescapedAmpersandInterpretedAsCharacterReference();
								}
								else
								{
									ErrNotSemicolonTerminated();
								}
							}

							/*
							 * Otherwise, return a character token for the character
							 * corresponding to the entity name (as given by the
							 * second column of the named character references
							 * table).
							 */

							char[] val = NamedCharacters.VALUES[candidate];
							if (
								// [NOCPP[
							val.Length == 1
								// ]NOCPP]
								// CPPONLY: val[1] == 0
							)
							{
								EmitOrAppendOne(val, returnState);
							}
							else
							{
								EmitOrAppendTwo(val, returnState);
							}
							// this is so complicated!
							if (strBufMark < strBufLen)
							{
								// if (strBufOffset != -1) {
								// if ((returnState & (~1)) != 0) {
								// for (int i = strBufMark; i < strBufLen; i++) {
								// appendLongStrBuf(buf[strBufOffset + i]);
								// }
								// } else {
								// tokenHandler.Characters(buf, strBufOffset
								// + strBufMark, strBufLen
								// - strBufMark);
								// }
								// } else {
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									for (int i = strBufMark; i < strBufLen; i++)
									{
										AppendLongStrBuf(strBuf[i]);
									}
								}
								else
								{
									TokenHandler.Characters(strBuf, strBufMark,
											strBufLen - strBufMark);
								}
								// }
							}
							//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                            if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
							{
								cstart = pos;
							}
							//state = Transition(state, returnState, reconsume, pos);
                            state = returnState;
							reconsume = true;
							goto continueStateloop;
							/*
							 * If the markup contains I'm &notit; I tell you, the
							 * entity is parsed as "not", as in, I'm ¬it; I tell
							 * you. But if the markup was I'm &notin; I tell you,
							 * the entity would be parsed as "notin;", resulting in
							 * I'm ∉ I tell you.
							 */
						}
					// XXX reorder point
					case TokenizerState.CONSUME_NCR:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						prevValue = -1;
						value = 0;
						seenDigits = false;
						/*
						 * The behavior further depends on the character after the
						 * U+0023 NUMBER SIGN:
						 */
						switch (c)
						{
							case 'x':
							case 'X':

								/*
								 * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
								 * LETTER X Consume the X.
								 * 
								 * Follow the steps below, but using the range of
								 * characters U+0030 DIGIT ZERO through to U+0039
								 * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
								 * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
								 * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
								 * LETTER F (in other words, 0-9, A-F, a-f).
								 * 
								 * When it comes to interpreting the number,
								 * interpret it as a hexadecimal number.
								 */
								AppendStrBuf(c);
								//state = Transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
						        state = TokenizerState.HEX_NCR_LOOP;

								goto continueStateloop;
							default:
								/*
								 * Anything else Follow the steps below, but using
								 * the range of characters U+0030 DIGIT ZERO through
								 * to U+0039 DIGIT NINE (i.e. just 0-9).
								 * 
								 * When it comes to interpreting the number,
								 * interpret it as a decimal number.
								 */
								//state = Transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
						        state = TokenizerState.DECIMAL_NRC_LOOP;
								reconsume = true;
								// FALL THROUGH goto continueStateloop;
								break;
						}
						// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
						goto case TokenizerState.DECIMAL_NRC_LOOP;
					case TokenizerState.DECIMAL_NRC_LOOP:
						/*decimalloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							// Deal with overflow gracefully
							if (value < prevValue)
							{
								value = 0x110000; // Value above Unicode range but
								// within int
								// range
							}
							prevValue = value;
							/*
							 * Consume as many characters as match the range of
							 * characters given above.
							 */
							if (c >= '0' && c <= '9')
							{
								seenDigits = true;
								value *= 10;
								value += c - '0';
								continue;
							}
							else if (c == ';')
							{
								if (seenDigits)
								{
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;

									// FALL THROUGH goto continueStateloop;
									goto breakDecimalloop;
								}
								else
								{
									ErrNoDigitsInNCR();
									AppendStrBuf(';');
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
										cstart = pos + 1;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;

									goto continueStateloop;
								}
							}
							else
							{
								/*
								 * If no characters match the range, then don't
								 * consume any characters (and unconsume the U+0023
								 * NUMBER SIGN character and, if appropriate, the X
								 * character). This is a parse error; nothing is
								 * returned.
								 * 
								 * Otherwise, if the next character is a U+003B
								 * SEMICOLON, consume that too. If it isn't, there
								 * is a parse error.
								 */
								if (!seenDigits)
								{
									ErrNoDigitsInNCR();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								else
								{
									ErrCharRefLacksSemicolon();
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
										cstart = pos;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;
									reconsume = true;
									// FALL THROUGH goto continueStateloop;
									goto breakDecimalloop;
								}
							}
						}
					breakDecimalloop:
						goto case TokenizerState.HANDLE_NCR_VALUE;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.HANDLE_NCR_VALUE:
						// WARNING previous state sets reconsume
						// XXX inline this case TokenizerState.if the method size can take it
						HandleNcrValue(returnState);
						//state = Transition(state, returnState, reconsume, pos);
						state = returnState;

						goto continueStateloop;
					// XXX reorder point
					case TokenizerState.HEX_NCR_LOOP:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							// Deal with overflow gracefully
							if (value < prevValue)
							{
								value = 0x110000; // Value above Unicode range but
								// within int
								// range
							}
							prevValue = value;
							/*
							 * Consume as many characters as match the range of
							 * characters given above.
							 */
							if (c >= '0' && c <= '9')
							{
								seenDigits = true;
								value *= 16;
								value += c - '0';
								continue;
							}
							else if (c >= 'A' && c <= 'F')
							{
								seenDigits = true;
								value *= 16;
								value += c - 'A' + 10;
								continue;
							}
							else if (c >= 'a' && c <= 'f')
							{
								seenDigits = true;
								value *= 16;
								value += c - 'a' + 10;
								continue;
							}
							else if (c == ';')
							{
								if (seenDigits)
								{
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;
									goto continueStateloop;
								}
								else
								{
									ErrNoDigitsInNCR();
									AppendStrBuf(';');
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									goto continueStateloop;
								}
							}
							else
							{
								/*
								 * If no characters match the range, then don't
								 * consume any characters (and unconsume the U+0023
								 * NUMBER SIGN character and, if appropriate, the X
								 * character). This is a parse error; nothing is
								 * returned.
								 * 
								 * Otherwise, if the next character is a U+003B
								 * SEMICOLON, consume that too. If it isn't, there
								 * is a parse error.
								 */
								if (!seenDigits)
								{
									ErrNoDigitsInNCR();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								else
								{
									ErrCharRefLacksSemicolon();
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                    state = TokenizerState.HANDLE_NCR_VALUE;
									reconsume = true;
									goto continueStateloop;
								}
							}
						}
					// XXX reorder point
					case TokenizerState.PLAINTEXT:
						/*plaintextloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '\u0000':
									EmitPlaintextReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * RAWTEXT state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.CLOSE_TAG_OPEN:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						/*
						 * Otherwise, if the content model flag is set to the PCDATA
						 * state, or if the next few characters do match that tag
						 * name, consume the next input character:
						 */
						switch (c)
						{
							case '>':
								/* U+003E GREATER-THAN SIGN (>) Parse error. */
								ErrLtSlashGt();
								/*
								 * Switch to the data state.
								 */
								cstart = pos + 1;
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							case '\r':
								SilentCarriageReturn();
								/* Anything else Parse error. */
								ErrGarbageAfterLtSlash();
								/*
								 * Switch to the bogus comment state.
								 */
								ClearLongStrBufAndAppend('\n');
								//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                state = TokenizerState.BOGUS_COMMENT;
								goto breakStateloop;
							case '\n':
								SilentLineFeed();
								/* Anything else Parse error. */
								ErrGarbageAfterLtSlash();
								/*
								 * Switch to the bogus comment state.
								 */
								ClearLongStrBufAndAppend('\n');
								//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                state = TokenizerState.BOGUS_COMMENT;
								goto continueStateloop;
							case '\u0000':
								c = '\uFFFD';
								// fall thru
								goto default;
							default:
								if (c >= 'A' && c <= 'Z')
								{
									c += (char)0x20;
								}
								if (c >= 'a' && c <= 'z')
								{
									/*
									 * U+0061 LATIN SMALL LETTER A through to U+007A
									 * LATIN SMALL LETTER Z Create a new end tag
									 * token,
									 */
									endTag = true;
									/*
									 * set its tag name to the input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * then switch to the tag name state. (Don't
									 * emit the token yet; further details will be
									 * filled in before it is emitted.)
									 */
									//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
									state = TokenizerState.TAG_NAME;
									goto continueStateloop;
								}
								else
								{
									/* Anything else Parse error. */
									ErrGarbageAfterLtSlash();
									/*
									 * Switch to the bogus comment state.
									 */
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
								    state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								}
						}
					// XXX reorder point
					case TokenizerState.RCDATA:
						/*rcdataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in RCDATA state.
									 */
									FlushChars(buf, pos);
									ClearStrBufAndAppend(c);
									additional = '\u0000';
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * RCDATA less-than sign state.
									 */
									FlushChars(buf, pos);

									returnState = state;
									//state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Emit the current input character as a
									 * character token. Stay in the RCDATA state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.RAWTEXT:
						/*rawtextloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * RAWTEXT less-than sign state.
									 */
									FlushChars(buf, pos);

									returnState = state;
									//state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
									goto breakRawtextloop;
								// FALL THRU goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Emit the current input character as a
									 * character token. Stay in the RAWTEXT state.
									 */
									continue;
							}
						}
					breakRawtextloop:
						goto case TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
					// XXX fallthru don't reorder
					case TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN:
						/*rawtextrcdatalessthansignloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto breakRawtextrcdatalessthansignloop;
								// FALL THRU goto continueStateloop;
								default:
									/*
									 * Otherwise, emit a U+003C LESS-THAN SIGN
									 * character token
									 */
									TokenHandler.Characters(Tokenizer.LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakRawtextrcdatalessthansignloop:
						goto case TokenizerState.NON_DATA_END_TAG_NAME;
					// XXX fall thru. don't reorder.
					case TokenizerState.NON_DATA_END_TAG_NAME:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * ASSERT! when entering this state, set index to 0 and
							 * call clearStrBuf() assert (contentModelElement !=
							 * null); Let's implement the above without lookahead.
							 * strBuf is the 'temporary buffer'.
							 */
							if (index < endTagExpectationAsArray.Length)
							{
								char e = endTagExpectationAsArray[index];
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != e)
								{
									// [NOCPP[
									ErrHtml4LtSlashInRcdata(folded);
									// ]NOCPP]
									TokenHandler.Characters(Tokenizer.LT_SOLIDUS,
											0, 2);
									EmitStrBuf();
									cstart = pos;
									//state = Transition(state, returnState, reconsume, pos);
								    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								AppendStrBuf(c);
								index++;
								continue;
							}
							else
							{
								endTag = true;
								// XXX replace contentModelElement with different
								// type
								tagName = endTagExpectation;
								switch (c)
								{
									case '\r':
										SilentCarriageReturn();
										//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
								        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;

										goto breakStateloop;
									case '\n':
										SilentLineFeed();
										goto case ' ';
									// fall thru
									case ' ':
									case '\t':
									case '\u000C':
										/*
										 * U+0009 CHARACTER TABULATION U+000A LINE
										 * FEED (LF) U+000C FORM FEED (FF) U+0020
										 * SPACE If the current end tag token is an
										 * appropriate end tag token, then switch to
										 * the before attribute name state.
										 */
										//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
								        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
										goto continueStateloop;
									case '/':
										/*
										 * U+002F SOLIDUS (/) If the current end tag
										 * token is an appropriate end tag token,
										 * then switch to the self-closing start tag
										 * state.
										 */
										//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
								        state = TokenizerState.SELF_CLOSING_START_TAG;
										goto continueStateloop;
									case '>':
										/*
										 * U+003E GREATER-THAN SIGN (>) If the
										 * current end tag token is an appropriate
										 * end tag token, then emit the current tag
										 * token and switch to the data state.
										 */
										//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
								        state = EmitCurrentTagToken(false, pos);
										if (shouldSuspend)
										{
											goto breakStateloop;
										}
										goto continueStateloop;
									default:
										/*
										 * Emit a U+003C LESS-THAN SIGN character
										 * token, a U+002F SOLIDUS character token,
										 * a character token for each of the
										 * characters in the temporary buffer (in
										 * the order they were added to the buffer),
										 * and reconsume the current input character
										 * in the RAWTEXT state.
										 */
										// [NOCPP[
										ErrWarnLtSlashInRcdata();
										// ]NOCPP]
										TokenHandler.Characters(LT_SOLIDUS, 0, 2);
										EmitStrBuf();
										if (c == '\u0000')
										{
											EmitReplacementCharacter(buf, pos);
										}
										else
										{
											cstart = pos; // don't drop the
											// character
										}
										//state = Transition(state, returnState, reconsume, pos);
								        state = returnState;
										goto continueStateloop;
								}
							}
						}
					// XXX reorder point
					// BEGIN HOTSPOT WORKAROUND
					case TokenizerState.BOGUS_COMMENT:
						/*boguscommentloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume every character up to and including the first
							 * U+003E GREATER-THAN SIGN character (>) or the end of
							 * the file (EOF), whichever comes first. Emit a comment
							 * token whose data is the concatenation of all the
							 * characters starting from and including the character
							 * that caused the state machine to switch into the
							 * bogus comment state, up to and including the
							 * character immediately before the last consumed
							 * character (i.e. up to the character just before the
							 * U+003E or EOF character). (If the comment was started
							 * by the end of the file (EOF), the token is empty.)
							 * 
							 * Switch to the data state.
							 * 
							 * If the end of the file was reached, reconsume the EOF
							 * character.
							 */
							switch (c)
							{
								case '>':
									EmitComment(0, pos);
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT_HYPHEN;
									goto breakBoguscommentloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									AppendLongStrBuf(c);
									continue;
							}
						}
					breakBoguscommentloop:
						goto case TokenizerState.BOGUS_COMMENT_HYPHEN;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BOGUS_COMMENT_HYPHEN:
						/*boguscommenthyphenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '>':
									// [NOCPP[
									MaybeAppendSpaceToBogusComment();
									// ]NOCPP]
									EmitComment(0, pos);
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									AppendSecondHyphenToBogusComment();
									goto continueBoguscommenthyphenloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
							}
						continueBoguscommenthyphenloop:
							continue;
						}

					// XXX reorder point
					case TokenizerState.SCRIPT_DATA:
						/*scriptdataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data less-than sign state.
									 */
									FlushChars(buf, pos);
									returnState = state;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN;
									goto breakScriptdataloop; // FALL THRU continue
								// stateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data state.
									 */
									continue;
							}
						}
					breakScriptdataloop:
						goto case TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN:
						/*scriptdatalessthansignloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto continueStateloop;
								case '!':
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPE_START;

									goto breakScriptdatalessthansignloop; // FALL THRU
								// continue
								// stateloop;
								default:
									/*
									 * Otherwise, emit a U+003C LESS-THAN SIGN
									 * character token
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdatalessthansignloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPE_START;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPE_START:
						/*scriptdataescapestartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escape start dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH;
									goto breakScriptdataescapestartloop; // FALL THRU
								// continue
								// stateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdataescapestartloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH:
						/*scriptdataescapestartdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
									goto breakScriptdataescapestartdashloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdataescapestartdashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH:
						/*scriptdataescapeddashdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Stay in the
									 * script data escaped dash dash state.
									 */
									continue;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
									 * GREATER-THAN SIGN character token. Switch to
									 * the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakScriptdataescapeddashdashloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakScriptdataescapeddashdashloop;
								// goto continueStateloop;
							}
						}
					breakScriptdataescapeddashdashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED:
						/*scriptdataescapedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH;
									goto breakScriptdataescapedloop; // FALL THRU
								// continue
								// stateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data escaped state.
									 */
									continue;
							}
						}
					breakScriptdataescapedloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_DASH:
						/*scriptdataescapeddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto breakScriptdataescapeddashloop;
								// goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdataescapeddashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
						/*scriptdataescapedlessthanloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data escaped end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									returnState = TokenizerState.SCRIPT_DATA_ESCAPED;
                                    
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto continueStateloop;
								case 'S':
								case 's':
									/*
									 * U+0041 LATIN CAPITAL LETTER A through to
									 * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
									 * LESS-THAN SIGN character token and the
									 * current input character as a character token.
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									index = 1;
									/*
									 * Set the temporary buffer to the empty string.
									 * Append the lowercase TokenizerState.version of the current
									 * input character (add 0x0020 to the
									 * character's code point) to the temporary
									 * buffer. Switch to the script data double
									 * escape start state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START;
									goto breakScriptdataescapedlessthanloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Emit a U+003C LESS-THAN SIGN
									 * character token and reconsume the current
									 * input character in the script data escaped
									 * state.
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdataescapedlessthanloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START:
						/*scriptdatadoubleescapestartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							Debug.Assert(index > 0);
							if (index < 6)
							{ // SCRIPT_ARR.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.SCRIPT_ARR[index])
								{
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
								    state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							switch (c)
							{
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
								case '/':
								case '>':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
									 * (>) Emit the current input character as a
									 * character token. If the temporary buffer is
									 * the string "script", then switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakScriptdatadoubleescapestartloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data escaped state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapestartloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED:
						/*scriptdatadoubleescapedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data double escaped dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
									goto breakScriptdatadoubleescapedloop; // FALL THRU
								// continue
								// stateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data double escaped state.
									 */
									continue;
							}
						}
					breakScriptdatadoubleescapedloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
						/*scriptdatadoubleescapeddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data double escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
									goto breakScriptdatadoubleescapeddashloop;
								// goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapeddashloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
						/*scriptdatadoubleescapeddashdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Stay in the
									 * script data double escaped dash dash state.
									 */
									continue;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto breakScriptdatadoubleescapeddashdashloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
									 * GREATER-THAN SIGN character token. Switch to
									 * the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapeddashdashloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
						/*scriptdatadoubleescapedlessthanloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
									 * character token. Set the temporary buffer to
									 * the empty string. Switch to the script data
									 * double escape end state.
									 */
									index = 0;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END;
									goto breakScriptdatadoubleescapedlessthanloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data double escaped
									 * state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapedlessthanloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END:
						/*scriptdatadoubleescapeendloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (index < 6)
							{ // SCRIPT_ARR.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.SCRIPT_ARR[index])
								{
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
								    state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							switch (c)
							{
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
								case '/':
								case '>':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
									 * (>) Emit the current input character as a
									 * character token. If the temporary buffer is
									 * the string "script", then switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								default:
									/*
									 * Reconsume the current input character in the
									 * script data double escaped state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}

					// XXX reorder point
					case TokenizerState.MARKUP_DECLARATION_OCTYPE:
						/*markupdeclarationdoctypeloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (index < 6)
							{ // OCTYPE.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded == Tokenizer.OCTYPE[index])
								{
									AppendLongStrBuf(c);
								}
								else
								{
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
								    state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
							    // state = Transition(state, Tokenizer.DOCTYPE, reconsume, pos);
							    state = TokenizerState.DOCTYPE;
								reconsume = true;
								goto breakMarkupdeclarationdoctypeloop;
								// goto continueStateloop;
							}
						}
					breakMarkupdeclarationdoctypeloop:
						goto case TokenizerState.DOCTYPE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE:
						/*doctypeloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							InitDoctypeFields();
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									goto breakDoctypeloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrMissingSpaceBeforeDoctypeName();
									/*
									 * Reconsume the current character in the before
									 * DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									reconsume = true;
									goto breakDoctypeloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeloop:
						goto case TokenizerState.BEFORE_DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_NAME:
						/*beforedoctypenameloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE name state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrNamelessDoctype();
									/*
									 * Create a new DOCTYPE token. Set its
									 * force-quirks flag to on.
									 */
									forceQuirks = true;
									/*
									 * Emit the token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Create a
										 * new DOCTYPE token. Set the token's name
										 * to the lowercase TokenizerState.version of the input
										 * character (add 0x0020 to the character's
										 * code point).
										 */
										c += (char)0x20;
									}
									/* Anything else Create a new DOCTYPE token. */
									/*
									 * Set the token's name name to the current
									 * input character.
									 */
									ClearStrBufAndAppend(c);
									/*
									 * Switch to the DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.DOCTYPE_NAME;
									goto breakBeforedoctypenameloop;
								// goto continueStateloop;
							}
						}
					breakBeforedoctypenameloop:
						goto case TokenizerState.DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_NAME:
						/*doctypenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									StrBufToDoctypeName();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the after DOCTYPE name state.
									 */
									StrBufToDoctypeName();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_NAME;
									goto breakDoctypenameloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									StrBufToDoctypeName();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * U+0041 LATIN CAPITAL LETTER A through to
									 * U+005A LATIN CAPITAL LETTER Z Append the
									 * lowercase TokenizerState.version of the input character (add
									 * 0x0020 to the character's code point) to the
									 * current DOCTYPE token's name.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										c += (char)0x0020;
									}
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the DOCTYPE name state.
									 */
									continue;
							}
						}
					breakDoctypenameloop:
						goto case TokenizerState.AFTER_DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_NAME:
						/*afterdoctypenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after DOCTYPE name state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case 'p':
								case 'P':
									index = 0;
									//state = Transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
							        state = TokenizerState.DOCTYPE_UBLIC;

									goto breakAfterdoctypenameloop;
								// goto continueStateloop;
								case 's':
								case 'S':
									index = 0;
									//state = Transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
							        state = TokenizerState.DOCTYPE_YSTEM;
									goto continueStateloop;
								default:
									/*
									 * Otherwise, this is the parse error.
									 */
									BogusDoctype();

									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;

									goto continueStateloop;
							}
						}
					breakAfterdoctypenameloop:
						goto case TokenizerState.DOCTYPE_UBLIC;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_UBLIC:
						/*doctypeublicloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * If the six characters starting from the current input
							 * character are an ASCII case-insensitive match for the
							 * word "PUBLIC", then consume those characters and
							 * switch to the before DOCTYPE public identifier state.
							 */
							if (index < 5)
							{ // UBLIC.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.UBLIC[index])
								{
									BogusDoctype();
									// forceQuirks = true;
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
								    state = TokenizerState.BOGUS_DOCTYPE;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
								//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
							    state = TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD;
								reconsume = true;
								goto breakDoctypeublicloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeublicloop:
						goto case TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD:
						/*afterdoctypepublickeywordloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE public
									 * identifier state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakAfterdoctypepublickeywordloop;
								// FALL THROUGH continue stateloop
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse Error.
									 */
									ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse Error.
									 */
									ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypepublickeywordloop:
						goto case TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
						/*beforedoctypepublicidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE public identifier
									 * state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's public identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
									goto breakBeforedoctypepublicidentifierloop;
								// goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * public identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBeforedoctypepublicidentifierloop:
						goto case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
						/*doctypepublicidentifierdoublequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * DOCTYPE public identifier state.
									 */
									publicIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakDoctypepublicidentifierdoublequotedloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrGtInPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									publicIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * public identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
					breakDoctypepublicidentifierdoublequotedloop:
						goto case TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
						/*afterdoctypepublicidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
							        state = TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the between DOCTYPE public and
									 * system identifiers state.
									 */
									//state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
							        state = TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;

									goto breakAfterdoctypepublicidentifierloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse error.
									 */
									ErrNoSpaceBetweenPublicAndSystemIds();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse error.
									 */
									ErrNoSpaceBetweenPublicAndSystemIds();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypepublicidentifierloop:
						goto case TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
						/*betweendoctypepublicandsystemidentifiersloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the between DOCTYPE public and system
									 * identifiers state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's system identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto breakBetweendoctypepublicandsystemidentifiersloop;
								// goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * system identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBetweendoctypepublicandsystemidentifiersloop:
						goto case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
						/*doctypesystemidentifierdoublequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * DOCTYPE system identifier state.
									 */
									systemIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrGtInSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									systemIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * system identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
                    // next 2 lines were unreachable; commented out
					//breakDoctypesystemidentifierdoublequotedloop:
					//	goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
						/*afterdoctypesystemidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after DOCTYPE system identifier state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									/*
									 * Switch to the bogus DOCTYPE state. (This does
									 * not set the DOCTYPE token's force-quirks flag
									 * to on.)
									 */
									BogusDoctypeWithoutQuirks();
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto breakAfterdoctypesystemidentifierloop;
								// goto continueStateloop;
							}
						}
					breakAfterdoctypesystemidentifierloop:
						goto case TokenizerState.BOGUS_DOCTYPE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BOGUS_DOCTYPE:
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit that
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Stay in the bogus DOCTYPE
									 * state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.DOCTYPE_YSTEM:
						/*doctypeystemloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Otherwise, if the six characters starting from the
							 * current input character are an ASCII case-insensitive
							 * match for the word "SYSTEM", then consume those
							 * characters and switch to the before DOCTYPE system
							 * identifier state.
							 */
							if (index < 5)
							{ // YSTEM.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != YSTEM[index])
								{
									BogusDoctype();
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
								    state = TokenizerState.BOGUS_DOCTYPE;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								goto continueStateloop;
							}
							else
							{
								//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
							    state = TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD;
								reconsume = true;
								goto breakDoctypeystemloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeystemloop:
						goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD:
						/*afterdoctypesystemkeywordloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;

									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE public
									 * identifier state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
									goto breakAfterdoctypesystemkeywordloop;
								// FALL THROUGH continue stateloop
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse Error.
									 */
									ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse Error.
									 */
									ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypesystemkeywordloop:
						goto case TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
						/*beforedoctypesystemidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE system identifier
									 * state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's system identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * system identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto breakBeforedoctypesystemidentifierloop;
								// goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBeforedoctypesystemidentifierloop:
						goto case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * DOCTYPE system identifier state.
									 */
									systemIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
									goto continueStateloop;
								case '>':
									ErrGtInSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									systemIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * system identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
					// XXX reorder point

					case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * DOCTYPE public identifier state.
									 */
									publicIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
									goto continueStateloop;
								case '>':
									ErrGtInPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									publicIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * public identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									continue;
							}
						}
                        // XXX reorder point
                         case TokenizerState.PROCESSING_INSTRUCTION:
                            //processinginstructionloop: 
                            for (;;) {
                               if (++pos == endPos) {
                                   break;
                               }

                               c = buf[pos];
                               switch (c) {
                                   case '?':
                                       //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                       state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;
                                   
                                       break;
                                // continue stateloop;
                                default:
                                    continue;
                            }
                        }
                    //breakProcessingInstructionLoop:
                        break;


                case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                    if (++pos == endPos) {
                         goto breakStateloop;
                   }
                   c = buf[pos];
                   switch (c) {
                      case '>':
                           //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                           state = TokenizerState.DATA;
                           continue;
                        default:
                           //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                           state = TokenizerState.PROCESSING_INSTRUCTION;
                           continue;
                  }
					// END HOTSPOT WORKAROUND
				}
			} // stateloop


			breakStateloop:

			FlushChars(buf, pos);
			/*
			 * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
			 */
			// Save locals
			stateSave = state;
			returnStateSave = returnState;
			return pos;
		}
Example #35
0
        /// <summary>
        /// Returns a literal object for the text between HtmlStart (the last position of the end of a
        /// tag) and the current position. If !AllowLiterals then it's wrapped in a span.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against
        /// </param>
        /// <param name="literal">
        /// [out] The literal.
        /// </param>
        ///
        /// <returns>
        /// true if it succeeds, false if it fails.
        /// </returns>

        public bool TryGetLiteral(HtmlElementFactory factory, out IDomObject literal)
        {


            if (Pos <= HtmlStart)
            {
                literal = null;
                return false;
            }

            // There's plain text -return it as a literal.
            
            DomText lit;
            switch(InsertionMode) {
                case InsertionMode.Invalid:
                    lit = new DomInvalidElement();
                    break;
                case InsertionMode.Text:
                    InsertionMode =InsertionMode.Default;
                    lit = new DomInnerText();
                    break;
                default:
                    lit = new DomText();
                    break;
            }
            literal = lit;

            if (factory.IsBound)
            {
                lit.SetTextIndex(factory.Document, factory.Document.DocumentIndex.TokenizeString(HtmlStart, Pos - HtmlStart));
            }
            else
            {
                string text = factory.Html.SubstringBetween(HtmlStart, Pos);
                literal.NodeValue = HtmlData.HtmlDecode(text);
            }

            if (WrapLiterals)
            {
                DomElement wrapper = DomElement.Create("span");
                wrapper.ChildNodesInternal.AddAlways(literal);
                literal = wrapper;
            }
        

            if (Parent != null)
            {
                ((DomElement)Parent.Element).ChildNodesInternal.AddAlways(literal);
                Reset();
                return false;
            }
            else
            {
                TokenizerState = TokenizerState.Finished;
                return true;
            }
        }
Example #36
0
 void EmitOrAppendTwo(char[] val, TokenizerState returnState)
 {
     //TODO: review here=>   use != or == ?
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
         AppendLongStrBuf(val[1]);
     }
     else
     {
         TokenListener.Characters(val, 0, 2);
     }
 }
Example #37
0
		private TokenizerState EmitCurrentTagToken(bool selfClosing, int pos)
		{
			cstart = pos + 1;
			MaybeErrSlashInEndTag(selfClosing);
            stateSave = TokenizerState.DATA;
		    HtmlAttributes attrs = attributes ?? HtmlAttributes.EMPTY_ATTRIBUTES;

			if (endTag)
			{
				/*
				 * When an end tag token is emitted, the content model flag must be
				 * switched to the PCDATA state.
				 */
				MaybeErrAttributesOnEndTag(attrs);
				TokenHandler.EndTag(tagName);
			}
			else
			{
				TokenHandler.StartTag(tagName, attrs, selfClosing);
			}
			tagName = null;
			ResetAttributes();
			/*
			 * The token handler may have called setStateAndEndTagExpectation
			 * and changed stateSave since the start of this method.
			 */
			return stateSave;
		}
Example #38
0
 /// <summary>
 /// Initializes a new instance of the <see cref="UnclosedBlockCommentException"/> class.
 /// </summary>
 /// <param name="tokenizerState">State of the tokenizer.</param>
 /// <param name="batchSource">The batch source.</param>
 internal UnclosedBlockCommentException(TokenizerState tokenizerState, IBatchSource batchSource)
     : base(tokenizerState, batchSource)
 {
 }
Example #39
0
		// ]NOCPP]

		// For the token handler to call
		/**
		 * Sets the tokenizer state and the associated element name. This should 
		 * only ever used to put the tokenizer into one of the states that have
		 * a special end tag expectation.
		 * 
		 * @param specialTokenizerState
		 *            the tokenizer state to set
		 * @param endTagExpectation
		 *            the expected end tag for transitioning back to normal
		 */
		public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
				[Local] String endTagExpectation)
		{
			this.stateSave = specialTokenizerState;
            if (specialTokenizerState == TokenizerState.DATA)
			{
				return;
			}
			char[] asArray = endTagExpectation.ToCharArray();
			this.endTagExpectation = ElementName.ElementNameByBuffer(asArray, 0, asArray.Length);
			EndTagExpectationToArray();
		}
Example #40
0
            IEnumerator <SqlString> IEnumerable <SqlString> .GetEnumerator()
            {
                TokenizerState state            = TokenizerState.WhiteSpace;
                int            parenthesisCount = 0;
                bool           escapeQuote      = false;
                int            tokenStart       = 0;
                int            tokenLength      = 0;
                string         originalString   = original.ToString();

                for (int i = 0; i < originalString.Length; i++)
                {
                    char ch = originalString[i];
                    switch (state)
                    {
                    case TokenizerState.WhiteSpace:
                        if (ch == '\'')
                        {
                            state        = TokenizerState.Quoted;
                            tokenLength += 1;
                        }
                        else if (ch == ',')
                        {
                            yield return(new SqlString(","));
                            //tokenLength += 1?
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            tokenLength     += 1;
                            parenthesisCount = 1;
                        }
                        else if (char.IsWhiteSpace(ch) == false)
                        {
                            state        = TokenizerState.Token;
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.Quoted:
                        if (escapeQuote)
                        {
                            escapeQuote  = false;
                            tokenLength += 1;
                        }
                        // handle escaping of ' by using '' or \'
                        else if (ch == '\\' || (ch == '\'' && i + 1 < originalString.Length && originalString[i + 1] == '\''))
                        {
                            escapeQuote  = true;
                            tokenLength += 1;
                        }
                        else if (ch == '\'')
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            tokenStart += tokenLength + 1;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.InParenthesis:
                        if (ch == ')' || ch == ']')
                        {
                            tokenLength      += 1;
                            parenthesisCount -= 1;
                            if (parenthesisCount == 0)
                            {
                                yield return(original.Substring(tokenStart, tokenLength));

                                tokenStart += tokenLength + 1;
                                tokenLength = 0;
                                state       = TokenizerState.WhiteSpace;
                            }
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            tokenLength      += 1;
                            parenthesisCount += 1;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.Token:
                        if (char.IsWhiteSpace(ch))
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            tokenStart += tokenLength + 1;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else if (ch == ',')                                 // stop current token, and send the , as well
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            yield return(new SqlString(","));

                            tokenStart += tokenLength + 2;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            parenthesisCount = 1;
                            tokenLength     += 1;
                        }
                        else if (ch == '\'')
                        {
                            state        = TokenizerState.Quoted;
                            tokenLength += 1;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    default:
                        throw new InvalidExpressionException("Could not understand the string " + original);
                    }
                }
                if (tokenLength > 0)
                {
                    yield return(original.Substring(tokenStart, tokenLength));
                }
            }
Example #41
0
		/**
		 * Sets the tokenizer state and the associated element name. This should 
		 * only ever used to put the tokenizer into one of the states that have
		 * a special end tag expectation.
		 * 
		 * @param specialTokenizerState
		 *            the tokenizer state to set
		 * @param endTagExpectation
		 *            the expected end tag for transitioning back to normal
		 */
		public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
				ElementName endTagExpectation)
		{
			this.stateSave = specialTokenizerState;
			this.endTagExpectation = endTagExpectation;
			EndTagExpectationToArray();
		}
        void StateLoop3_RawText_CData_RcRef(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {
                 
                //*************
            continueStateloop:
                //*************

                switch (state)
                {   
                    // XXX reorder point
                    case TokenizerState.CDATA_START:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // CDATA_LSQB.Length
                                    if (c == Tokenizer.CDATA_LSQB[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    reader.StartCollect(); // start coalescing
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto case TokenizerState.s68_CDATA_SECTION;
                                    //break; // FALL THROUGH goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                            //------------------------------------

                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s68_CDATA_SECTION:
                        /*cdatasectionloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB;
                                        goto breakCdatasectionloop; // FALL THROUGH
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        continue;
                                }
                            }
                            goto breakStateloop;
                        //------------------------------------
                        breakCdatasectionloop:
                            goto case TokenizerState.CDATA_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB:
                        /*cdatarsqb:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        //state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB_RSQB;
                                        goto breakCdatarsqb;
                                    default:
                                        TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                        state = TokenizerState.s68_CDATA_SECTION;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------ 
                        breakCdatarsqb:
                            goto case TokenizerState.CDATA_RSQB_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB_RSQB:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            switch (c)
                            {
                                case '>':
                                    //cstart = pos + 1;
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                default:
                                    TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 2);
                                    reader.StartCollect();
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;

                            }
                        } 
                    // XXX reorder point
                    case TokenizerState.s07_PLAINTEXT:
                        /*plaintextloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\u0000':
                                        EmitPlaintextReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * RAWTEXT state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        } 
                    // XXX reorder point
                    case TokenizerState.s03_RCDATA:
                        /*rcdataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in RCDATA state.
                                         */
                                        //FlushChars(buf, pos);
                                        FlushChars();
                                        ClearStrBufAndAppend(c);
                                        additional = '\u0000';
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * RCDATA less-than sign state.
                                         */
                                        //FlushChars(buf, pos);
                                        FlushChars();
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Emit the current input character as a
                                         * character token. Stay in the RCDATA state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s05_RAWTEXT:
                        /*rawtextloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * RAWTEXT less-than sign state.
                                         */
                                        FlushChars();

                                        returnState = state;
                                        //state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                                        goto breakRawtextloop;
                                    // FALL THRU goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Emit the current input character as a
                                         * character token. Stay in the RAWTEXT state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakRawtextloop:
                            goto case TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                        }
                    // XXX fallthru don't reorder
                    case TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN:
                        /*rawtextrcdatalessthansignloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto breakRawtextrcdatalessthansignloop;
                                    // FALL THRU goto continueStateloop;
                                    default:
                                        /*
                                         * Otherwise, emit a U+003C LESS-THAN SIGN
                                         * character token
                                         */
                                        TokenListener.Characters(Tokenizer.LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakRawtextrcdatalessthansignloop:
                            goto case TokenizerState.NON_DATA_END_TAG_NAME;
                        }
                    // XXX fall thru. don't reorder.
                    case TokenizerState.NON_DATA_END_TAG_NAME:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * ASSERT! when entering this state, set index to 0 and
                                 * call clearStrBuf() assert (contentModelElement !=
                                 * null); Let's implement the above without lookahead.
                                 * strBuf is the 'temporary buffer'.
                                 */
                                if (index < endTagExpectationAsArray.Length)
                                {
                                    char e = endTagExpectationAsArray[index];
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != e)
                                    {

                                        ErrHtml4LtSlashInRcdata(folded);
                                        TokenListener.Characters(Tokenizer.LT_SOLIDUS,
                                                0, 2);
                                        EmitStrBuf();
                                        reader.StartCollect();
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    AppendStrBuf(c);
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    endTag = true;
                                    // XXX replace contentModelElement with different
                                    // type
                                    tagName = endTagExpectation;
                                    switch (c)
                                    {
                                        case '\r':
                                            SilentCarriageReturn();
                                            //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                            state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;

                                            goto breakStateloop;
                                        case '\n':
                                        case ' ':
                                        case '\t':
                                        case '\u000C':
                                            /*
                                             * U+0009 CHARACTER TABULATION U+000A LINE
                                             * FEED (LF) U+000C FORM FEED (FF) U+0020
                                             * SPACE If the current end tag token is an
                                             * appropriate end tag token, then switch to
                                             * the before attribute name state.
                                             */
                                            //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                            state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                            goto continueStateloop;
                                        case '/':
                                            /*
                                             * U+002F SOLIDUS (/) If the current end tag
                                             * token is an appropriate end tag token,
                                             * then switch to the self-closing start tag
                                             * state.
                                             */
                                            //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                            state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                            goto continueStateloop;
                                        case '>':
                                            /*
                                             * U+003E GREATER-THAN SIGN (>) If the
                                             * current end tag token is an appropriate
                                             * end tag token, then emit the current tag
                                             * token and switch to the data state.
                                             */
                                            //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                            state = EmitCurrentTagToken(false);
                                            if (shouldSuspend)
                                            {
                                                goto breakStateloop;
                                            }
                                            goto continueStateloop;
                                        default:
                                            /*
                                             * Emit a U+003C LESS-THAN SIGN character
                                             * token, a U+002F SOLIDUS character token,
                                             * a character token for each of the
                                             * characters in the temporary buffer (in
                                             * the order they were added to the buffer),
                                             * and reconsume the current input character
                                             * in the RAWTEXT state.
                                             */
                                            // [NOCPP[
                                            ErrWarnLtSlashInRcdata();
                                            // ]NOCPP]
                                            TokenListener.Characters(LT_SOLIDUS, 0, 2);
                                            EmitStrBuf();
                                            if (c == '\u0000')
                                            {
                                                EmitReplacementCharacter();
                                            }
                                            else
                                            {
                                                reader.StartCollect(); // don't drop the
                                                // character
                                            }
                                            //state = Transition(state, returnState, reconsume, pos);
                                            state = returnState;
                                            goto continueStateloop;
                                    }
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        } 
                    case TokenizerState.PROCESSING_INSTRUCTION:
                        //processinginstructionloop: 
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '?':
                                        //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                        state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;

                                        break;
                                    // continue stateloop;
                                    default:
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    //breakProcessingInstructionLoop: 
                    case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;

                            }

                            switch (c)
                            {
                                case '>':
                                    //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    continue;
                                default:
                                    //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                                    state = TokenizerState.PROCESSING_INSTRUCTION;
                                    continue;
                            }

                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

       breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }
Example #43
0
		public void ResetToDataState()
		{
			strBufLen = 0;
			longStrBufLen = 0;
            stateSave = TokenizerState.DATA;
			// line = 1; XXX line numbers
			lastCR = false;
			index = 0;
			forceQuirks = false;
			additional = '\u0000';
			entCol = -1;
			firstCharKey = -1;
			lo = 0;
			hi = 0; // will always be overwritten before use anyway
			candidate = -1;
			strBufMark = 0;
			prevValue = -1;
			value = 0;
			seenDigits = false;
			endTag = false;
            // Removed J. Treworgy 12/7/2012 - this should remain true so the parser can choose to abort 
			//shouldSuspend = false;
			InitDoctypeFields();
			if (tagName != null)
			{
				tagName = null;
			}
			if (attributeName != null)
			{
				attributeName = null;
			}
			// [NOCPP[
			if (newAttributesEachTime)
			{
				// ]NOCPP]
				if (attributes != null)
				{
					attributes = null;
				}
				// [NOCPP[
			}
			// ]NOCPP]
		}
        void StateLoop3_DocType(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                  
                    // XXX reorder point
                    case TokenizerState.MARKUP_DECLARATION_OCTYPE:
                        /*markupdeclarationdoctypeloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // OCTYPE.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded == Tokenizer.OCTYPE[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    // state = Transition(state, Tokenizer.DOCTYPE, reconsume, pos);
                                    state = TokenizerState.s52_DOCTYPE;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto breakMarkupdeclarationdoctypeloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationdoctypeloop:
                            goto case TokenizerState.s52_DOCTYPE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s52_DOCTYPE:
                        /*doctypeloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                InitDoctypeFields();
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        goto breakDoctypeloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrMissingSpaceBeforeDoctypeName();
                                        /*
                                         * Reconsume the current character in the before
                                         * DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto breakDoctypeloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeloop:
                            goto case TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s53_BEFORE_DOCTYPE_NAME:
                        /*beforedoctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE name state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrNamelessDoctype();
                                        /*
                                         * Create a new DOCTYPE token. Set its
                                         * force-quirks flag to on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit the token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Create a
                                             * new DOCTYPE token. Set the token's name
                                             * to the lowercase TokenizerState.version of the input
                                             * character (add 0x0020 to the character's
                                             * code point).
                                             */
                                            c += (char)0x20;
                                        }
                                        /* Anything else Create a new DOCTYPE token. */
                                        /*
                                         * Set the token's name name to the current
                                         * input character.
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * Switch to the DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s54_DOCTYPE_NAME;
                                        goto breakBeforedoctypenameloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypenameloop:
                            goto case TokenizerState.s54_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s54_DOCTYPE_NAME:
                        /*doctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        StrBufToDoctypeName();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s55_AFTER_DOCTYPE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the after DOCTYPE name state.
                                         */
                                        StrBufToDoctypeName();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s55_AFTER_DOCTYPE_NAME;
                                        goto breakDoctypenameloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        StrBufToDoctypeName();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * U+0041 LATIN CAPITAL LETTER A through to
                                         * U+005A LATIN CAPITAL LETTER Z Append the
                                         * lowercase TokenizerState.version of the input character (add
                                         * 0x0020 to the character's code point) to the
                                         * current DOCTYPE token's name.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            c += (char)0x0020;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE name state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypenameloop:
                            goto case TokenizerState.s55_AFTER_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s55_AFTER_DOCTYPE_NAME:
                        /*afterdoctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after DOCTYPE name state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case 'p':
                                    case 'P':
                                        index = 0;
                                        //state = Transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
                                        state = TokenizerState.DOCTYPE_UBLIC;

                                        goto breakAfterdoctypenameloop;
                                    // goto continueStateloop;
                                    case 's':
                                    case 'S':
                                        index = 0;
                                        //state = Transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
                                        state = TokenizerState.DOCTYPE_YSTEM;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Otherwise, this is the parse error.
                                         */
                                        BogusDoctype();

                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;

                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypenameloop:
                            goto case TokenizerState.DOCTYPE_UBLIC;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.DOCTYPE_UBLIC:
                        /*doctypeublicloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * If the six characters starting from the current input
                                 * character are an ASCII case-insensitive match for the
                                 * word "PUBLIC", then consume those characters and
                                 * switch to the before DOCTYPE public identifier state.
                                 */
                                if (index < 5)
                                { // UBLIC.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.UBLIC[index])
                                    {
                                        BogusDoctype();
                                        // forceQuirks = true;
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
                                    state = TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD;
                                    //reconsume = true;
                                    reader.StepBack();

                                    goto breakDoctypeublicloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeublicloop:
                            goto case TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD:
                        /*afterdoctypepublickeywordloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE public
                                         * identifier state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakAfterdoctypepublickeywordloop;
                                    // FALL THROUGH continue stateloop
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypepublickeywordloop:
                            goto case TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
                        /*beforedoctypepublicidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE public identifier
                                         * state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's public identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                        goto breakBeforedoctypepublicidentifierloop;
                                    // goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * public identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypepublicidentifierloop:
                            goto case TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
                        /*doctypepublicidentifierdoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * DOCTYPE public identifier state.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakDoctypepublicidentifierdoublequotedloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrGtInPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * public identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypepublicidentifierdoublequotedloop:
                            goto case TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
                        /*afterdoctypepublicidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
                                        state = TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the between DOCTYPE public and
                                         * system identifiers state.
                                         */
                                        //state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
                                        state = TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;

                                        goto breakAfterdoctypepublicidentifierloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse error.
                                         */
                                        ErrNoSpaceBetweenPublicAndSystemIds();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse error.
                                         */
                                        ErrNoSpaceBetweenPublicAndSystemIds();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypepublicidentifierloop:
                            goto case TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
                        /*betweendoctypepublicandsystemidentifiersloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the between DOCTYPE public and system
                                         * identifiers state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's system identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto breakBetweendoctypepublicandsystemidentifiersloop;
                                    // goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * system identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBetweendoctypepublicandsystemidentifiersloop:
                            goto case TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
                        /*doctypesystemidentifierdoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * DOCTYPE system identifier state.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrGtInSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * system identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // next 2 lines were unreachable; commented out
                    //breakDoctypesystemidentifierdoublequotedloop:
                    //	goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
                        /*afterdoctypesystemidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after DOCTYPE system identifier state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Switch to the bogus DOCTYPE state. (This does
                                         * not set the DOCTYPE token's force-quirks flag
                                         * to on.)
                                         */
                                        BogusDoctypeWithoutQuirks();
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto breakAfterdoctypesystemidentifierloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypesystemidentifierloop:
                            goto case TokenizerState.s67_BOGUS_DOCTYPE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s67_BOGUS_DOCTYPE:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit that
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Stay in the bogus DOCTYPE
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.DOCTYPE_YSTEM:
                        /*doctypeystemloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Otherwise, if the six characters starting from the
                                 * current input character are an ASCII case-insensitive
                                 * match for the word "SYSTEM", then consume those
                                 * characters and switch to the before DOCTYPE system
                                 * identifier state.
                                 */
                                if (index < 5)
                                { // YSTEM.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != YSTEM[index])
                                    {
                                        BogusDoctype();
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        reader.StepBack();
                                        //reconsume = true;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    goto continueStateloop;
                                }
                                else
                                {
                                    //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
                                    state = TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto breakDoctypeystemloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeystemloop:
                            goto case TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD:
                        /*afterdoctypesystemkeywordloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;

                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE public
                                         * identifier state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto breakAfterdoctypesystemkeywordloop;
                                    // FALL THROUGH continue stateloop
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypesystemkeywordloop:
                            goto case TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
                        /*beforedoctypesystemidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE system identifier
                                         * state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's system identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * system identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto breakBeforedoctypesystemidentifierloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypesystemidentifierloop:
                            goto case TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * DOCTYPE system identifier state.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        ErrGtInSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * system identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                            // XXX reorder point

                        }
                    case TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * DOCTYPE public identifier state.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        ErrGtInPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * public identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.PROCESSING_INSTRUCTION:
                        //processinginstructionloop: 
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '?':
                                        //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                        state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;

                                        break;
                                    // continue stateloop;
                                    default:
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    //breakProcessingInstructionLoop: 
                    case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;

                            }

                            switch (c)
                            {
                                case '>':
                                    //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    continue;
                                default:
                                    //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                                    state = TokenizerState.PROCESSING_INSTRUCTION;
                                    continue;
                            }

                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }
Example #45
0
        internal static bool TryParse(IList <string> items, TokenizerArgs args, out T to)
        {
            if (items == null)
            {
                throw new ArgumentNullException("items");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            List <string> cArgs = new List <string>(items);

            using (TokenizerState <T> state = Tokenizer.NewState <T>(args))
            {
                TokenizerDefinition definition = state.Definition;
                to = null;

                int i;

                bool   atEnd      = false;
                char[] checkChars = args.PlusMinSuffixArguments ? new char[] { args.ArgumentValueSeparator, '+', '-' } : new char[] { args.ArgumentValueSeparator };

                int nPlaced = 0;
                for (i = 0; i < cArgs.Count; i++)
                {
                    string a = cArgs[i];

                    if (!atEnd && (a.Length > 1) && args.CommandLineChars.Contains(a[0]))
                    {
                        bool twoStart = a[0] == a[1];
                        if (a.Length == 2 && twoStart)
                        {
                            if (!definition.HasPlacedArguments)
                            {
                                args.ErrorMessage = TokenizerMessages.NoPlacedArgumentsDefined;
                                return(false);
                            }

                            atEnd = true;
                        }
                        else
                        {
                            int  aFrom = twoStart ? 2 : 1;
                            int  aTo   = args.AllowDirectArgs ? a.IndexOfAny(checkChars, aFrom) : -1;
                            char cTo   = (aTo > 0) ? a[aTo] : '\0';

                            string item = (aTo > 0) ? a.Substring(aFrom, aTo - aFrom) : a.Substring(aFrom);

                            TokenItem token;
                            string    value = null;

                            if (definition.TryGetToken(item, args.CaseSensitive, out token))
                            {
                                if (token.RequiresValue)
                                {
                                    if (i + 1 < cArgs.Count)
                                    {
                                        token.Evaluate(cArgs[++i], state);
                                    }
                                    else
                                    {
                                        args.ErrorMessage = TokenizerMessages.RequiredArgumentValueIsMissing;
                                        return(false);
                                    }
                                }
                                else
                                {
                                    token.Evaluate(null, state);
                                }

                                continue;
                            }
                            else
                            {
                                // Look for a shorter argument
                                for (int ii = item.Length - 1; ii > 0; ii--)
                                {
                                    if (definition.TryGetToken(item.Substring(0, ii), args.CaseSensitive, out token) &&
                                        token.AllowDirectValue(item.Substring(ii), state))
                                    {
                                        token.EvaluateDirect(item.Substring(ii), state);
                                        break;
                                    }
                                    else
                                    {
                                        token = null;
                                    }
                                }
                            }

                            if (token == null)
                            {
                                args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.UnknownArgumentX, a);
                                return(false);
                            }

                            if (token.RequiresValue && value == null)
                            {
                                if (i < cArgs.Count - 1)
                                {
                                    value = cArgs[i++];
                                }
                                else
                                {
                                    args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.ValueExpectedForArgumentX, a);
                                    return(false);
                                }
                            }
                            continue;
                        }
                    }
                    else if (!atEnd && args.AllowResponseFile && a.Length > 1 && a[0] == '@')
                    {
                        string file = a.Substring(1);

                        if (!File.Exists(file))
                        {
                            args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.ResponseFileXNotFound, file);
                            return(false);
                        }
                        using (StreamReader sr = File.OpenText(a.Substring(1)))
                        {
                            string line;
                            int    n = i + 1;
                            while (null != (line = sr.ReadLine()))
                            {
                                line = line.TrimStart();

                                if (line.Length > 1)
                                {
                                    if (line[0] != '#')
                                    {
                                        foreach (string word in Tokenizer.GetCommandlineWords(line))
                                        {
                                            cArgs.Insert(n++, word);
                                        }
                                    }
                                }
                            }
                        }

                        continue;
                    }
                    else if (!args.AllowNamedBetweenPlaced)
                    {
                        atEnd = true;
                    }

                    if (state.Definition.HasPlacedArguments)
                    {
                        if (nPlaced < state.Definition.PlacedItems.Count)
                        {
                            state.Definition.PlacedItems[nPlaced].Evaluate(cArgs[i], state);
                            nPlaced++;
                        }
                        else if (state.Definition.RestToken != null)
                        {
                            state.Definition.RestToken.Evaluate(cArgs[i], state);
                        }
                        else
                        {
                            args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.UnknownArgumentX, cArgs[i]);
                            return(false);
                        }
                    }
                }

                if (!state.IsComplete)
                {
                    return(false);
                }

                to = state.Instance;
                return(true);
            }
        }
 public TokenizerRule(TokenizerState state, IEnumerable<string> applicableData, TokenType type, Action<Stack<TokenizerState>> stateChange)
     : this(new[] { state }, applicableData, type, stateChange)
 {
 }
Example #47
0
        public Token ReadNextToken()
        {
            if(position == input.Length)
            {
                return null;
            }

            if(state != TokenizerState.PositionalValues && input[position] == EscapeMarker)
            {
                ReadNextString();
                state = TokenizerState.PositionalValues;
            }

            if(state == TokenizerState.PositionalValues)
            {
                return new PositionalArgumentToken(ReadNextString(), GetCurrentPosition());
            }

            var location = GetCurrentPosition();
            var c = ReadChar();
            if(state == TokenizerState.ShortName)
            {
                if(c == Tokenizer.EndOfString)
                {
                    state = TokenizerState.Normal;
                    return ReadNextToken();
                }

                return new ShortNameToken(c, location);
            }

            if(c == FlagCharacter)
            {
                var f = PeekChar();
                if(f == FlagCharacter)
                {
                    // we already peeked it, so just move to the next char
                    ReadChar();
                    return new LongNameToken(ReadUntilChar(Tokenizer.EndOfString, AssignmentOperator), location);
                }

                state = TokenizerState.ShortName;
                return ReadNextToken();
            }
            else
            {
                var value = ReadNextString();
                return (value == null) ? null : new PositionalArgumentToken(value, location);
            }
        }
 public TokenizerRule(TokenizerState state, string applicableData, TokenType type, Action<Stack<TokenizerState>> stateChange)
     : this(new[] { state }, new[] { applicableData }, type, stateChange)
 {
 }
        void StateLoop3_Comment(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {

                    case TokenizerState.s45_MARKUP_DECLARATION_OPEN:
                        /*markupdeclarationopenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * If the next two characters are both U+002D
                                 * HYPHEN-MINUS characters (-), consume those two
                                 * characters, create a comment token whose data is the
                                 * empty string, and switch to the comment start state.
                                 * 
                                 * Otherwise, if the next seven characters are an ASCII
                                 * case-insensitive match for the word "DOCTYPE", then
                                 * consume those characters and switch to the DOCTYPE
                                 * state.
                                 * 
                                 * Otherwise, if the insertion mode is
                                 * "in foreign content" and the current node is not an
                                 * element in the HTML namespace and the next seven
                                 * characters are an case-sensitive match for the string
                                 * "[CDATA[" (the five uppercase TokenizerState.letters "CDATA" with a
                                 * U+005B LEFT SQUARE BRACKET character before and
                                 * after), then consume those characters and switch to
                                 * the CDATA section state.
                                 * 
                                 * Otherwise, is is a parse error. Switch to the bogus
                                 * comment state. The next character that is consumed,
                                 * if any, is the first character that will be in the
                                 * comment.
                                 */
                                switch (c)
                                {
                                    case '-':
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
                                        state = TokenizerState.MARKUP_DECLARATION_HYPHEN;
                                        goto breakMarkupdeclarationopenloop;
                                    // goto continueStateloop;
                                    case 'd':
                                    case 'D':
                                        ClearLongStrBufAndAppend(c);
                                        index = 0;
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
                                        state = TokenizerState.MARKUP_DECLARATION_OCTYPE;
                                        goto continueStateloop;
                                    case '[':
                                        if (TokenListener.IsCDataSectionAllowed)
                                        {
                                            ClearLongStrBufAndAppend(c);
                                            index = 0;
                                            //state = Transition(state, Tokenizer.CDATA_START, reconsume, pos);
                                            state = TokenizerState.CDATA_START;
                                            goto continueStateloop;
                                        }
                                        else
                                        {
                                            // else fall through
                                            goto default;
                                        }
                                    default:
                                        ErrBogusComment();
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationopenloop:
                            goto case TokenizerState.MARKUP_DECLARATION_HYPHEN;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.MARKUP_DECLARATION_HYPHEN:
                        /*markupdeclarationhyphenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\u0000':
                                        goto breakStateloop;
                                    case '-':
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.COMMENT_START, reconsume, pos);
                                        state = TokenizerState.s46_COMMENT_START;
                                        goto breakMarkupdeclarationhyphenloop;
                                    // goto continueStateloop;
                                    default:
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationhyphenloop:
                            goto case TokenizerState.s46_COMMENT_START;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s46_COMMENT_START:
                        /*commentstartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * start dash state.
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
                                        state = TokenizerState.s47_COMMENT_START_DASH;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrPrematureEndOfComment();
                                        /* Emit the comment token. */
                                        EmitComment(0);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;

                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        // state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;

                                        goto breakCommentstartloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the input character to
                                         * the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;

                                        goto breakCommentstartloop;
                                    // goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentstartloop:
                            goto case TokenizerState.s48_COMMENT;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s48_COMMENT:
                        /*commentloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * end dash state
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                        state = TokenizerState.s49_COMMENT_END_DASH;
                                        goto breakCommentloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the input character to
                                         * the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the comment state.
                                         */
                                        continue;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentloop:
                            goto case TokenizerState.s49_COMMENT_END_DASH;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s49_COMMENT_END_DASH:
                        /*commentenddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * end state
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                        state = TokenizerState.s50_COMMENT_END;
                                        goto breakCommentenddashloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        /*
                                         * Anything else Append a U+002D HYPHEN-MINUS
                                         * (-) character and the input character to the
                                         * comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentenddashloop:
                            goto case TokenizerState.s50_COMMENT_END;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s50_COMMENT_END:
                        /*commentendloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the comment
                                         * token.
                                         */
                                        EmitComment(2);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        /* U+002D HYPHEN-MINUS (-) Parse error. */
                                        /*
                                         * Append a U+002D HYPHEN-MINUS (-) character to
                                         * the comment token's data.
                                         */
                                        AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
                                        /*
                                         * Stay in the comment end state.
                                         */
                                        continue;
                                    case '\r':
                                        AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AdjustDoubleHyphenAndAppendToLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                    case '!':
                                        ErrHyphenHyphenBang();
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
                                        state = TokenizerState.s51_COMMENT_END_BANG;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Append two U+002D HYPHEN-MINUS (-) characters
                                         * and the input character to the comment
                                         * token's data.
                                         */
                                        AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s51_COMMENT_END_BANG:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the comment
                                         * token.
                                         */
                                        EmitComment(3);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        /*
                                         * Append two U+002D HYPHEN-MINUS (-) characters
                                         * and a U+0021 EXCLAMATION MARK (!) character
                                         * to the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment end dash state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                        state = TokenizerState.s49_COMMENT_END_DASH;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append two U+002D HYPHEN-MINUS
                                         * (-) characters, a U+0021 EXCLAMATION MARK (!)
                                         * character, and the input character to the
                                         * comment token's data. Switch to the comment
                                         * state.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s47_COMMENT_START_DASH:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //-------------------------------
                                //eof
                                goto breakStateloop;
                            }
                            //----------------------

                            switch (c)
                            {
                                case '-':
                                    /*
                                     * U+002D HYPHEN-MINUS (-) Switch to the comment end
                                     * state
                                     */
                                    AppendLongStrBuf(c);
                                    //state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                    state = TokenizerState.s50_COMMENT_END;
                                    goto continueStateloop;
                                case '>':
                                    ErrPrematureEndOfComment();
                                    /* Emit the comment token. */
                                    EmitComment(1);
                                    /*
                                     * Switch to the data state.
                                     */
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                case '\r':
                                    AppendLongStrBufCarriageReturn();
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto breakStateloop;
                                case '\n':
                                    AppendLongStrBufLineFeed();
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto continueStateloop;
                                case '\u0000':
                                    c = '\uFFFD';
                                    // fall thru
                                    goto default;
                                default:
                                    /*
                                     * Append a U+002D HYPHEN-MINUS character (-) and
                                     * the current input character to the comment
                                     * token's data.
                                     */
                                    AppendLongStrBuf(c);
                                    /*
                                     * Switch to the comment state.
                                     */
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto continueStateloop;
                            }
                        }
                    // XXX reorder point
                    case TokenizerState.CDATA_START:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // CDATA_LSQB.Length
                                    if (c == Tokenizer.CDATA_LSQB[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    reader.StartCollect(); // start coalescing
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto case TokenizerState.s68_CDATA_SECTION;
                                    //break; // FALL THROUGH goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                            //------------------------------------

                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s68_CDATA_SECTION:
                        /*cdatasectionloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB;
                                        goto breakCdatasectionloop; // FALL THROUGH
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        continue;
                                }
                            }
                            goto breakStateloop;
                        //------------------------------------
                        breakCdatasectionloop:
                            goto case TokenizerState.CDATA_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB:
                        /*cdatarsqb:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        //state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB_RSQB;
                                        goto breakCdatarsqb;
                                    default:
                                        TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                        state = TokenizerState.s68_CDATA_SECTION;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------ 
                        breakCdatarsqb:
                            goto case TokenizerState.CDATA_RSQB_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB_RSQB:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            switch (c)
                            {
                                case '>':
                                    //cstart = pos + 1;
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                default:
                                    TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 2);
                                    reader.StartCollect();
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;

                            }
                        }

                    // XXX reorder point
                    // BEGIN HOTSPOT WORKAROUND
                    case TokenizerState.s44_BOGUS_COMMENT:
                        /*boguscommentloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume every character up to and including the first
                                 * U+003E GREATER-THAN SIGN character (>) or the end of
                                 * the file (EOF), whichever comes first. Emit a comment
                                 * token whose data is the concatenation of all the
                                 * characters starting from and including the character
                                 * that caused the state machine to switch into the
                                 * bogus comment state, up to and including the
                                 * character immediately before the last consumed
                                 * character (i.e. up to the character just before the
                                 * U+003E or EOF character). (If the comment was started
                                 * by the end of the file (EOF), the token is empty.)
                                 * 
                                 * Switch to the data state.
                                 * 
                                 * If the end of the file was reached, reconsume the EOF
                                 * character.
                                 */
                                switch (c)
                                {
                                    case '>':
                                        EmitComment(0);
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
                                        state = TokenizerState.BOGUS_COMMENT_HYPHEN;
                                        goto breakBoguscommentloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        AppendLongStrBuf(c);
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBoguscommentloop:
                            goto case TokenizerState.BOGUS_COMMENT_HYPHEN;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.BOGUS_COMMENT_HYPHEN:
                        /*boguscommenthyphenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        // [NOCPP[
                                        MaybeAppendSpaceToBogusComment();
                                        // ]NOCPP]
                                        EmitComment(0);
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        AppendSecondHyphenToBogusComment();
                                        goto continueBoguscommenthyphenloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                }
                            //------------------------------------
                            continueBoguscommenthyphenloop:
                                continue;
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

       breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }
Example #50
0
            private static Token[] Init(string text, bool allowComments)
            {
                List <Token> tokens = new List <Token>();

                // This is a hack to prevent the need to "close" token/state types.
                // If the state is not NONE at the end, then either a comment or string was left open.
                text += "\n";

                int            length       = text.Length;
                TokenizerState state        = TokenizerState.NONE;
                List <string>  tokenBuilder = new List <string>();
                int            tokenStart   = 0;

                int[] lines   = new int[length];
                int[] columns = new int[length];
                int   line    = 1;
                int   column  = 1;

                for (int i = 0; i < length; ++i)
                {
                    lines[i]   = line;
                    columns[i] = column++;
                    if (text[i] == '\n')
                    {
                        line++;
                        column = 1;
                    }
                }

                char stringType = '\0';
                char c;

                for (int i = 0; i < length; ++i)
                {
                    c = text[i];
                    switch (state)
                    {
                    case TokenizerState.NONE:
                        switch (c)
                        {
                        case ' ':
                        case '\r':
                        case '\n':
                        case '\t':
                            // skip whitespace
                            break;

                        case '"':
                        case '\'':
                            stringType = c;
                            tokenStart = i;
                            state      = TokenizerState.STRING;
                            break;

                        case '/':
                            if (allowComments && i + 1 < length && text[i + 1] == '*')
                            {
                                state = TokenizerState.COMMENT;
                                i++;         // do not allow /*/ as a self-closing comment.
                            }
                            else
                            {
                                // Go ahead and add as a token and let the parser throw the exception.
                                tokens.Add(new Token("/", lines[i], columns[i]));
                            }
                            break;

                        default:
                            // numbers, unquoted strings, nulls, booleans
                            if ((c >= '0' && c <= '9') ||
                                (c >= 'a' && c <= 'z') ||
                                (c >= 'A' && c <= 'Z') ||
                                c == '_' ||
                                c == '.' ||
                                c == '-')
                            {
                                state      = TokenizerState.WORD;
                                tokenStart = i;
                            }
                            else
                            {
                                // Either a JSON syntax character (like a bracket, comma, or colon) or an error.
                                tokens.Add(new Token(c.ToString(), lines[i], columns[i]));
                            }
                            break;
                        }
                        break;

                    case TokenizerState.COMMENT:
                        if (c == '*')
                        {
                            if (i + 1 < length && text[i + 1] == '/')
                            {
                                ++i;     // skip the slash
                                state = TokenizerState.NONE;
                            }
                        }
                        break;

                    case TokenizerState.STRING:
                        if (c == stringType)
                        {
                            tokens.Add(new Token(text.Substring(tokenStart, i - tokenStart + 1), lines[tokenStart], columns[tokenStart]));
                            state = TokenizerState.NONE;
                        }
                        else if (c == '\\')
                        {
                            i++;
                        }
                        break;

                    case TokenizerState.WORD:
                        if ((c >= '0' && c <= '9') ||
                            (c >= 'a' && c <= 'z') ||
                            (c >= 'A' && c <= 'Z') ||
                            c == '_' ||
                            c == '.' ||
                            c == '-')
                        {
                            // continue the word. Make sure this expression always matches the one above it in the NONE state.
                        }
                        else
                        {
                            tokens.Add(new Token(text.Substring(tokenStart, i - tokenStart), lines[tokenStart], columns[tokenStart]));
                            --i;
                            state = TokenizerState.NONE;
                        }
                        break;
                    }
                }

                if (state != TokenizerState.NONE)
                {
                    if (state == TokenizerState.COMMENT)
                    {
                        throw new JsonParserException("Unexpected EOF detected. A comment seems to be left unclosed.");
                    }
                    throw new JsonParserException("Unexpected EOF detected. A string seems to be left unclosed.");
                }

                return(tokens.ToArray());
            }
        /// <summary>
        /// Gets the next token.
        /// </summary>
        /// <returns>
        /// A <see cref="Token" /> or <c>null</c> at end of string.
        /// </returns>
        public Token GetNextToken()
        {
            this.valueBuilder.Clear();
            var previousState = this.State;

            while (this.position < this.lineChars.Length)
            {
                // Peek current position.
                var s1 = new string(this.lineChars[this.position], 1);

                // Peek 2 character string at current position.
                var s2 = new string(
                    this.lineChars.Skip(this.position).Take(Math.Min(2, this.lineChars.Length - this.position))
                    .ToArray());

                switch (this.State)
                {
                case TokenizerState.None:

                    // Look for start of string literal
                    switch (s1)
                    {
                    case SingleQuote:

                        this.State = TokenizerState.SingleQuoteString;
                        this.Consume();
                        continue;

                    case DoubleQuote:

                        this.State = TokenizerState.DoubleQuoteString;
                        this.Consume();
                        continue;
                    }

                    // Look for start/end of comments
                    switch (s2)
                    {
                    case LineComment:

                        this.State = TokenizerState.LineComment;

                        if (this.position == 0)
                        {
                            // If at start of line, continue parsing comment
                            continue;
                        }

                        break;

                    case BlockCommentStart:

                        this.State = TokenizerState.BlockComment;

                        if (this.position == 0)
                        {
                            // If at start of line, continue parsing comment
                            continue;
                        }

                        break;

                    case BlockCommentEnd:

                        throw new InvalidOperationException("Found BlockCommentEnd when not within a block comment");

                    default:
                        this.Consume();
                        continue;
                    }

                    break;

                case TokenizerState.BlockComment:

                    // Look for block comment end
                    if (s2 == BlockCommentEnd)
                    {
                        this.State = TokenizerState.None;
                        this.Consume(2);
                    }
                    else
                    {
                        this.Consume();
                        continue;
                    }

                    break;

                case TokenizerState.LineComment:

                    // While in a line comment, consume till end of string
                    this.Consume();
                    continue;

                case TokenizerState.SingleQuoteString:

                    if (s2 == QuoteQuote)
                    {
                        // QuoteQuote within single quote string literal is part of the string
                        this.Consume(2);
                        continue;
                    }

                    if (s1 == SingleQuote)
                    {
                        // End of string literal
                        this.State = TokenizerState.None;
                    }

                    this.Consume();
                    continue;

                case TokenizerState.DoubleQuoteString:

                    if (s1 == DoubleQuote)
                    {
                        // End of string literal
                        this.State = TokenizerState.None;
                    }

                    this.Consume();
                    continue;
                }

                // If we get here, the token is ready to be emitted
                break;
            }

            var value = this.valueBuilder.ToString();

            if (value.Length == 0)
            {
                // End of line
                if (this.State == TokenizerState.LineComment)
                {
                    // Thus end of line comment
                    this.State = TokenizerState.None;
                }

                return(null);
            }

            if (previousState == TokenizerState.BlockComment || previousState == TokenizerState.LineComment)
            {
                return(new Token(TokenType.Comment, value));
            }

            return(new Token(TokenType.Text, value));
        }
        void StateLoop3_Tag(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                    case TokenizerState.s01_DATA:
                        /*dataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in data state.
                                         */
                                        FlushChars();
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\u0000');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the tag
                                         * open state.
                                         */
                                        FlushChars();

                                        //state = Transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
                                        state = TokenizerState.s08_TAG_OPEN;
                                        goto breakDataloop; // FALL THROUGH continue
                                    // stateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the input character as a
                                         * character token.
                                         * 
                                         * Stay in the data state.
                                         */
                                        continue;
                                }
                            }


                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------
                        breakDataloop:
                            goto case TokenizerState.s08_TAG_OPEN;
                            //------------      
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s08_TAG_OPEN:
                        /*tagopenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * The behavior of this state depends on the content
                                 * model flag.
                                 */

                                /*
                                 * If the content model flag is set to the PCDATA state
                                 * Consume the next input character:
                                 */
                                if (c >= 'A' && c <= 'Z')
                                {
                                    /*
                                     * U+0041 LATIN CAPITAL LETTER A through to U+005A
                                     * LATIN CAPITAL LETTER Z Create a new start tag
                                     * token,
                                     */
                                    endTag = false;
                                    /*
                                     * set its tag name to the lowercase TokenizerState.version of the
                                     * input character (add 0x0020 to the character's
                                     * code point),
                                     */
                                    ClearStrBufAndAppend((char)(c + 0x20));
                                    /* then switch to the tag name state. */
                                    //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                    state = TokenizerState.s10_TAG_NAME;
                                    /*
                                     * (Don't emit the token yet; further details will
                                     * be filled in before it is emitted.)
                                     */
                                    goto breakTagopenloop;
                                    // goto continueStateloop;
                                }
                                else if (c >= 'a' && c <= 'z')
                                {
                                    /*
                                     * U+0061 LATIN SMALL LETTER A through to U+007A
                                     * LATIN SMALL LETTER Z Create a new start tag
                                     * token,
                                     */
                                    endTag = false;
                                    /*
                                     * set its tag name to the input character,
                                     */
                                    ClearStrBufAndAppend(c);
                                    /* then switch to the tag name state. */
                                    //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                    state = TokenizerState.s10_TAG_NAME;
                                    /*
                                     * (Don't emit the token yet; further details will
                                     * be filled in before it is emitted.)
                                     */
                                    goto breakTagopenloop;
                                    // goto continueStateloop;
                                }
                                switch (c)
                                {
                                    case '!':
                                        /*
                                         * U+0021 EXCLAMATION MARK (!) Switch to the
                                         * markup declaration open state.
                                         */
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
                                        state = TokenizerState.s45_MARKUP_DECLARATION_OPEN;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the close tag
                                         * open state.
                                         */
                                        //state = Transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
                                        state = TokenizerState.s09_CLOSE_TAG_OPEN;
                                        goto continueStateloop;
                                    case '?':
                                        /*
                                         * U+003F QUESTION MARK (?) Parse error.
                                         */
                                        ErrProcessingInstruction();
                                        /*
                                         * Switch to the bogus comment state.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrLtGt();
                                        /*
                                         * Emit a U+003C LESS-THAN SIGN character token
                                         * and a U+003E GREATER-THAN SIGN character
                                         * token.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 2);
                                        /* Switch to the data state. */
                                        //cstart = pos + 1;
                                        reader.SkipOneAndStartCollect();
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrBadCharAfterLt(c);
                                        /*
                                         * Emit a U+003C LESS-THAN SIGN character token
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakTagopenloop:
                            goto case TokenizerState.s10_TAG_NAME;
                        }
                    //  FALL THROUGH DON'T REORDER
                    case TokenizerState.s10_TAG_NAME:
                        /*tagnameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakTagnameloop;
                                    // goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Append the
                                             * lowercase TokenizerState.version of the current input
                                             * character (add 0x0020 to the character's
                                             * code point) to the current tag token's
                                             * tag name.
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current tag token's tag
                                         * name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the tag name state.
                                         */
                                        continue;
                                }
                            }

                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakTagnameloop:
                            goto case TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s34_BEFORE_ATTRIBUTE_NAME:
                        /*beforeattributenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before attribute name state.
                                         */
                                        continue;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;

                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '\"';
                                    case '\"':
                                    case '\'':
                                    case '<':
                                    case '=':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                         * SIGN (=) Parse error.
                                         */
                                        ErrBadCharBeforeAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Start a new attribute in the
                                         * current tag token.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Set that
                                             * attribute's name to the lowercase TokenizerState.version
                                             * of the current input character (add
                                             * 0x0020 to the character's code point)
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Set that attribute's name to the current
                                         * input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * and its value to the empty string.
                                         */
                                        // Will do later.
                                        /*
                                         * Switch to the attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s35_ATTRIBUTE_NAME;
                                        goto breakBeforeattributenameloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforeattributenameloop:
                            goto case TokenizerState.s35_ATTRIBUTE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s35_ATTRIBUTE_NAME:
                        /*attributenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s36_AFTER_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the after attribute name state.
                                         */
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s36_AFTER_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        AttributeNameComplete();
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '=':
                                        /*
                                         * U+003D EQUALS SIGN (=) Switch to the before
                                         * attribute value state.
                                         */
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
                                        state = TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                                        goto breakAttributenameloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AttributeNameComplete();
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '\"';
                                    case '\"':
                                    case '\'':
                                    case '<':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) Parse error.
                                         */
                                        ErrQuoteOrLtInAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Append the
                                             * lowercase TokenizerState.version of the current input
                                             * character (add 0x0020 to the character's
                                             * code point) to the current attribute's
                                             * name.
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the attribute name state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributenameloop:
                            goto case TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE:
                        /*beforeattributevalueloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before attribute value state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the
                                         * attribute value (double-quoted) state.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED;

                                        goto breakBeforeattributevalueloop;
                                    // goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the attribute
                                         * value (unquoted) state and reconsume this
                                         * input character.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                        state = TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED;
                                        NoteUnquotedAttributeValue();
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the attribute
                                         * value (single-quoted) state.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s39_ATTRIBUTE_VALUE_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrAttributeValueMissing();
                                        /*
                                         * Emit the current tag token.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '<';
                                    case '<':
                                    case '=':
                                    case '`':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
                                         * (=) U+0060 GRAVE ACCENT (`)
                                         */
                                        ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        // [NOCPP[
                                        ErrHtml4NonNameInUnquotedAttribute(c);
                                        // ]NOCPP]
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        /*
                                         * Switch to the attribute value (unquoted)
                                         * state.
                                         */

                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                        state = TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED;

                                        NoteUnquotedAttributeValue();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforeattributevalueloop:
                            goto case TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
                        }

                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
                        /*attributevaluedoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * attribute value (quoted) state.
                                         */
                                        AddAttributeWithValue();

                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                                        goto breakAttributevaluedoublequotedloop;
                                    // goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * additional allowed character being U+0022
                                         * QUOTATION MARK (").
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\"');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (double-quoted)
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributevaluedoublequotedloop:
                            goto case TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED:
                        /*afterattributevaluequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto breakAfterattributevaluequotedloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrNoSpaceBetweenAttributes();
                                        /*
                                         * Reconsume the character in the before
                                         * attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterattributevaluequotedloop:
                            goto case TokenizerState.s43_SELF_CLOSING_START_TAG;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s43_SELF_CLOSING_START_TAG:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            //---------------------------------
                            /*
                             * Consume the next input character:
                             */
                            switch (c)
                            {
                                case '>':
                                    /*
                                     * U+003E GREATER-THAN SIGN (>) Set the self-closing
                                     * flag of the current tag token. Emit the current
                                     * tag token.
                                     */
                                    // [NOCPP[
                                    ErrHtml4XmlVoidSyntax();
                                    // ]NOCPP]
                                    //state = Transition(state, EmitCurrentTagToken(true, pos), reconsume, pos);
                                    state = EmitCurrentTagToken(true);
                                    if (shouldSuspend)
                                    {
                                        goto breakStateloop;
                                    }
                                    /*
                                     * Switch to the data state.
                                     */
                                    goto continueStateloop;
                                default:
                                    /* Anything else Parse error. */
                                    ErrSlashNotFollowedByGt();
                                    /*
                                     * Reconsume the character in the before attribute
                                     * name state.
                                     */
                                    //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                    state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;
                            }
                        }
                    // XXX reorder point
                    case TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        AddAttributeWithValue();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        AddAttributeWithValue();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * additional allowed character being U+003E
                                         * GREATER-THAN SIGN (>)
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('>');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AddAttributeWithValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto case '<';
                                    // fall thru
                                    case '<':
                                    case '\"':
                                    case '\'':
                                    case '=':
                                    case '`':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                         * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
                                         */
                                        ErrUnquotedAttributeValOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        // fall through
                                        goto default;
                                    default:
                                        // [NOCPP]
                                        ErrHtml4NonNameInUnquotedAttribute(c);
                                        // ]NOCPP]
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (unquoted) state.
                                         */
                                        continue;
                                }
                            }
                            //-------------------------------
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s36_AFTER_ATTRIBUTE_NAME:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after attribute name state.
                                         */
                                        continue;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '=':
                                        /*
                                         * U+003D EQUALS SIGN (=) Switch to the before
                                         * attribute value state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
                                        state = TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto case '\"';
                                    // fall thru
                                    case '\"':
                                    case '\'':
                                    case '<':
                                        ErrQuoteOrLtInAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        AddAttributeWithoutValue();
                                        /*
                                         * Anything else Start a new attribute in the
                                         * current tag token.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Set that
                                             * attribute's name to the lowercase TokenizerState.version
                                             * of the current input character (add
                                             * 0x0020 to the character's code point)
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Set that attribute's name to the current
                                         * input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * and its value to the empty string.
                                         */
                                        // Will do later.
                                        /*
                                         * Switch to the attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s35_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }

                    // XXX reorder point
                    case TokenizerState.s39_ATTRIBUTE_VALUE_SINGLE_QUOTED:
                        /*attributevaluesinglequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * attribute value (quoted) state.
                                         */
                                        AddAttributeWithValue();

                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                                        goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * + additional allowed character being U+0027
                                         * APOSTROPHE (').
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\'');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto breakAttributevaluesinglequotedloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (double-quoted)
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributevaluesinglequotedloop:
                            goto case TokenizerState.CONSUME_CHARACTER_REFERENCE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.CONSUME_CHARACTER_REFERENCE:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }
                            if (c == '\u0000')
                            {
                                goto breakStateloop;
                            }
                            /*
                             * Unlike the definition is the spec, this state does not
                             * return a value and never requires the caller to
                             * backtrack. This state takes care of emitting characters
                             * or appending to the current attribute value. It also
                             * takes care of that in the case TokenizerState.when consuming the
                             * character reference fails.
                             */
                            /*
                             * This section defines how to consume a character
                             * reference. This definition is used when parsing character
                             * references in text and in attributes.
                             * 
                             * The behavior depends on the identity of the next
                             * character (the one immediately after the U+0026 AMPERSAND
                             * character):
                             */
                            switch (c)
                            {
                                case ' ':
                                case '\t':
                                case '\n':
                                case '\r': // we'll reconsume!
                                case '\u000C':
                                case '<':
                                case '&':
                                    EmitOrAppendStrBuf(returnState);
                                    //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
                                        reader.StartCollect();
                                    }
                                    //state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto continueStateloop;
                                case '#':
                                    /*
                                     * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
                                     * SIGN.
                                     */
                                    AppendStrBuf('#');
                                    //state = Transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
                                    state = TokenizerState.CONSUME_NCR;
                                    goto continueStateloop;
                                default:
                                    if (c == additional)
                                    {
                                        EmitOrAppendStrBuf(returnState);
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    if (c >= 'a' && c <= 'z')
                                    {
                                        firstCharKey = c - 'a' + 26;
                                    }
                                    else if (c >= 'A' && c <= 'Z')
                                    {
                                        firstCharKey = c - 'A';
                                    }
                                    else
                                    {
                                        // No match
                                        /*
                                         * If no match can be made, then this is a parse
                                         * error.
                                         */
                                        ErrNoNamedCharacterMatch();
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    // Didn't fail yet
                                    AppendStrBuf(c);
                                    //state = Transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
                                    state = TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;

                                    // FALL THROUGH goto continueStateloop;
                                    break;
                            }
                            //------------------------------------
                            goto case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP:
                        {
                            char c;
                            if (reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }

                            if (c == '\u0000')
                            {
                                goto breakStateloop;
                            }
                            /*
                             * The data structure is as follows:
                             * 
                             * HILO_ACCEL is a two-dimensional int array whose major
                             * index corresponds to the second character of the
                             * character reference (code point as index) and the
                             * minor index corresponds to the first character of the
                             * character reference (packed so that A-Z runs from 0
                             * to 25 and a-z runs from 26 to 51). This layout makes
                             * it easier to use the sparseness of the data structure
                             * to omit parts of it: The second dimension of the
                             * table is null when no character reference starts with
                             * the character corresponding to that row.
                             * 
                             * The int value HILO_ACCEL (by these indeces) is zero
                             * if there exists no character reference starting with
                             * that two-letter prefix. Otherwise, the value is an
                             * int that packs two shorts so that the higher short is
                             * the index of the highest character reference name
                             * with that prefix in NAMES and the lower short
                             * corresponds to the index of the lowest character
                             * reference name with that prefix. (It happens that the
                             * first two character reference names share their
                             * prefix so the packed int cannot be 0 by packing the
                             * two shorts.)
                             * 
                             * NAMES is an array of byte arrays where each byte
                             * array encodes the name of a character references as
                             * ASCII. The names omit the first two letters of the
                             * name. (Since storing the first two letters would be
                             * redundant with the data contained in HILO_ACCEL.) The
                             * entries are lexically sorted.
                             * 
                             * For a given index in NAMES, the same index in VALUES
                             * contains the corresponding expansion as an array of
                             * two UTF-16 code units (either the character and
                             * U+0000 or a suggogate pair).
                             */
                            int hilo = 0;
                            if (c <= 'z')
                            {
                                int[] row = NamedCharactersAccel.HILO_ACCEL[c];
                                if (row != null)
                                {
                                    hilo = row[firstCharKey];
                                }
                            }
                            if (hilo == 0)
                            {
                                /*
                                 * If no match can be made, then this is a parse
                                 * error.
                                 */
                                ErrNoNamedCharacterMatch();
                                EmitOrAppendStrBuf(returnState);
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                            }
                            // Didn't fail yet
                            AppendStrBuf(c);
                            lo = hilo & 0xFFFF;
                            hi = hilo >> 16;
                            entCol = -1;
                            candidate = -1;
                            strBufMark = 0;
                            //state = Transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
                            state = TokenizerState.CHARACTER_REFERENCE_TAIL;
                            // FALL THROUGH goto continueStateloop;
                            goto case TokenizerState.CHARACTER_REFERENCE_TAIL;
                        }
                    case TokenizerState.CHARACTER_REFERENCE_TAIL:
                        /*outer:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                if (c == '\u0000')
                                {
                                    goto breakStateloop;
                                }
                                entCol++;
                                /*
                                 * Consume the maximum number of characters possible,
                                 * with the consumed characters matching one of the
                                 * identifiers in the first column of the named
                                 * character references table (in a case-sensitive
                                 * manner).
                                 */
                                /*loloop:*/
                                for (; ; )
                                {
                                    if (hi < lo)
                                    {
                                        goto breakOuter;
                                    }
                                    if (entCol == NamedCharacters.NAMES[lo].Length)
                                    {
                                        candidate = lo;
                                        strBufMark = this.strBuffer.Length;
                                        lo++;
                                    }
                                    else if (entCol > NamedCharacters.NAMES[lo].Length)
                                    {
                                        goto breakOuter;
                                    }
                                    else if (c > NamedCharacters.NAMES[lo][entCol])
                                    {
                                        lo++;
                                    }
                                    else
                                    {
                                        goto breakLoloop;
                                    }
                                }
                            breakLoloop:

                                /*hiloop:*/
                                for (; ; )
                                {
                                    if (hi < lo)
                                    {
                                        goto breakOuter;
                                    }
                                    if (entCol == NamedCharacters.NAMES[hi].Length)
                                    {
                                        goto breakHiloop;
                                    }
                                    if (entCol > NamedCharacters.NAMES[hi].Length)
                                    {
                                        goto breakOuter;
                                    }
                                    else if (c < NamedCharacters.NAMES[hi][entCol])
                                    {
                                        hi--;
                                    }
                                    else
                                    {
                                        goto breakHiloop;
                                    }
                                }

                            breakHiloop:

                                if (hi < lo)
                                {
                                    goto breakOuter;
                                }
                                AppendStrBuf(c);
                                continue;
                            }

                        breakOuter:

                            if (candidate == -1)
                            {
                                // reconsume deals with CR, LF or nul
                                /*
                                 * If no match can be made, then this is a parse error.
                                 */
                                ErrNoNamedCharacterMatch();
                                EmitOrAppendStrBuf(returnState);
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                            }
                            else
                            {
                                // c can't be CR, LF or nul if we got here
                                string candidateName = NamedCharacters.NAMES[candidate];
                                if (candidateName.Length == 0
                                        || candidateName[candidateName.Length - 1] != ';')
                                {
                                    /*
                                     * If the last character matched is not a U+003B
                                     * SEMICOLON (;), there is a parse error.
                                     */
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        /*
                                         * If the entity is being consumed as part of an
                                         * attribute, and the last character matched is
                                         * not a U+003B SEMICOLON (;),
                                         */
                                        char ch;
                                        if (strBufMark == this.strBuffer.Length)
                                        {
                                            ch = c;
                                        }
                                        else
                                        {
                                            // if (strBufOffset != -1) {
                                            // ch = buf[strBufOffset + strBufMark];
                                            // } else {
                                            ch = this.strBuffer[strBufMark];
                                            // }
                                        }
                                        if (ch == '=' || (ch >= '0' && ch <= '9')
                                                || (ch >= 'A' && ch <= 'Z')
                                                || (ch >= 'a' && ch <= 'z'))
                                        {
                                            /*
                                             * and the next character is either a U+003D
                                             * EQUALS SIGN character (=) or in the range
                                             * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
                                             * U+0041 LATIN CAPITAL LETTER A to U+005A
                                             * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                             * SMALL LETTER A to U+007A LATIN SMALL
                                             * LETTER Z, then, for historical reasons,
                                             * all the characters that were matched
                                             * after the U+0026 AMPERSAND (&) must be
                                             * unconsumed, and nothing is returned.
                                             */
                                            ErrNoNamedCharacterMatch();
                                            AppendStrBufToLongStrBuf();
                                            //state = Transition(state, returnState, reconsume, pos);
                                            state = returnState;
                                            //reconsume = true;
                                            reader.StepBack();
                                            goto continueStateloop;
                                        }
                                    }
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        ErrUnescapedAmpersandInterpretedAsCharacterReference();
                                    }
                                    else
                                    {
                                        ErrNotSemicolonTerminated();
                                    }
                                }

                                /*
                                 * Otherwise, return a character token for the character
                                 * corresponding to the entity name (as given by the
                                 * second column of the named character references
                                 * table).
                                 */

                                char[] val = NamedCharacters.VALUES[candidate];
                                if (val.Length == 1)
                                {
                                    EmitOrAppendOne(val, returnState);
                                }
                                else
                                {
                                    EmitOrAppendTwo(val, returnState);
                                }
                                // this is so complicated!
                                if (strBufMark < this.strBuffer.Length)
                                {
                                    // if (strBufOffset != -1) {
                                    // if ((returnState & (~1)) != 0) {
                                    // for (int i = strBufMark; i < strBufLen; i++) {
                                    // appendLongStrBuf(buf[strBufOffset + i]);
                                    // }
                                    // } else {
                                    // tokenHandler.Characters(buf, strBufOffset
                                    // + strBufMark, strBufLen
                                    // - strBufMark);
                                    // }
                                    // } else {
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        int j = this.strBuffer.Length;
                                        for (int i = strBufMark; i < j; i++)
                                        {
                                            AppendLongStrBuf(strBuffer[i]);
                                        }
                                    }
                                    else
                                    {
                                        TokenListener.Characters(CopyFromStringBuiler(this.strBuffer, strBufMark, this.strBuffer.Length - strBufMark));
                                    }
                                    // }
                                }
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                                /*
                                 * If the markup contains I'm &notit; I tell you, the
                                 * entity is parsed as "not", as in, I'm ¬it; I tell
                                 * you. But if the markup was I'm &notin; I tell you,
                                 * the entity would be parsed as "notin;", resulting in
                                 * I'm ∉ I tell you.
                                 */
                            }

                        }
                    // XXX reorder point
                    case TokenizerState.s09_CLOSE_TAG_OPEN:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }

                            /*
                             * Otherwise, if the content model flag is set to the PCDATA
                             * state, or if the next few characters do match that tag
                             * name, consume the next input character:
                             */
                            switch (c)
                            {
                                case '>':
                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                    ErrLtSlashGt();
                                    /*
                                     * Switch to the data state.
                                     */
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                case '\r':
                                    SilentCarriageReturn();
                                    /* Anything else Parse error. */
                                    ErrGarbageAfterLtSlash();
                                    /*
                                     * Switch to the bogus comment state.
                                     */
                                    ClearLongStrBufAndAppend('\n');
                                    //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.s44_BOGUS_COMMENT;
                                    goto breakStateloop;
                                case '\n':
                                    /* Anything else Parse error. */
                                    ErrGarbageAfterLtSlash();
                                    /*
                                     * Switch to the bogus comment state.
                                     */
                                    ClearLongStrBufAndAppend('\n');
                                    //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.s44_BOGUS_COMMENT;
                                    goto continueStateloop;
                                case '\u0000':
                                    c = '\uFFFD';
                                    // fall thru
                                    goto default;
                                default:
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        c += (char)0x20;
                                    }
                                    if (c >= 'a' && c <= 'z')
                                    {
                                        /*
                                         * U+0061 LATIN SMALL LETTER A through to U+007A
                                         * LATIN SMALL LETTER Z Create a new end tag
                                         * token,
                                         */
                                        endTag = true;
                                        /*
                                         * set its tag name to the input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * then switch to the tag name state. (Don't
                                         * emit the token yet; further details will be
                                         * filled in before it is emitted.)
                                         */
                                        //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                        state = TokenizerState.s10_TAG_NAME;
                                        goto continueStateloop;
                                    }
                                    else
                                    {
                                        /* Anything else Parse error. */
                                        ErrGarbageAfterLtSlash();
                                        /*
                                         * Switch to the bogus comment state.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    }
                            }
                        }  
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }
Example #53
0
        /// <summary>
        /// Close out this element. This method will return true if something can be yielded; this this
        /// means it's got a parent at the top of the heirarchy. Otherwise it's just closed but false is
        /// returned.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against.
        /// </param>
        ///
        /// <returns>
        /// An enumerator that allows foreach to be used to process close element in this collection.
        /// </returns>

        public IEnumerable<IDomObject> CloseElement(HtmlElementFactory factory)
        {
            IDomObject element = null;

            if (TryGetLiteral(factory, out element))
            {
                yield return element;
            }
                
            if (Parent != null)
            {
                if (Parent.Parent == null)
                {
                    yield return Parent.Element;
                } 
                Parent.Reset(Pos);
                TokenizerState = TokenizerState.Finished;
            }
        }
Example #54
0
        public List <Token> Tokenize(string sourceCode)
        {
            _tokens                = new List <Token>();
            _currentToken          = new Token(TokenType.Unknown, string.Empty);
            _currentTokenizerState = TokenizerState.Default;

            foreach (var character in sourceCode)
            {
ReprocessCharacter:                 // ToDo: Get rid of goto to reprocess characters
                switch (_currentTokenizerState)
                {
                case TokenizerState.Default:
                    if (SingleCharTokens.ContainsKey(character))
                    {
                        _tokens.Add(new Token(SingleCharTokens[character], character.ToString()));
                    }
                    else if (char.IsLetter(character))
                    {
                        StartToken(TokenizerState.Word, character.ToString());
                    }
                    else if (char.IsDigit(character))
                    {
                        StartToken(TokenizerState.Number, character.ToString());
                    }
                    else if (character.Equals('"'))
                    {
                        StartToken(TokenizerState.String);
                    }
                    else if (character.Equals('\''))
                    {
                        StartToken(TokenizerState.Comment);
                    }
                    break;

                case TokenizerState.Word:
                    if (char.IsLetterOrDigit(character))
                    {
                        _currentToken.Text += character;
                    }
                    else if (character.Equals(':'))
                    {
                        EndToken(TokenType.Label);
                    }
                    else
                    {
                        EndToken(TokenType.Word);
                        goto ReprocessCharacter;
                    }
                    break;

                case TokenizerState.Number:     // ToDo: support negative numbers and floating point numbers.
                    if (char.IsDigit(character))
                    {
                        _currentToken.Text += character;
                    }
                    else
                    {
                        EndToken(TokenType.Number);
                        goto ReprocessCharacter;
                    }
                    break;

                case TokenizerState.String:
                    if (character.Equals('"'))
                    {
                        EndToken(TokenType.String);
                    }
                    else
                    {
                        _currentToken.Text += character;
                    }
                    break;

                case TokenizerState.Comment:
                    if (character.Equals('\n'))
                    {
                        EndToken(TokenType.Comment);
                    }
                    break;

                default:
                    throw new Exception($"Unknown tokenizer state reached.");
                }
            }
            return(_tokens);
        }
Example #55
0
        public void Tokenize()
        {
            while (_remainingText != string.Empty)
            {
                switch (_state)
                {
                case TokenizerState.ReadingText:
                    var textMatch = _rawTextRegex.Match(_remainingText);
                    if (textMatch.Success)
                    {
                        string rawText = textMatch.Groups["text"].ToString();
                        if (rawText != string.Empty)
                        {
                            var rawTextToken = new Token(TokenType.RawText, rawText);
                            _tokens.Add(rawTextToken);
                        }

                        _tokens.Add(new Token(TokenType.InstructionBegin, "<$"));
                        _remainingText = _remainingText.Remove(0, textMatch.Length);
                        _state         = TokenizerState.ReadingInstruction;
                    }
                    else
                    {
                        var rawTextToken = new Token(TokenType.RawText, _remainingText);
                        _tokens.Add(rawTextToken);
                        _remainingText = string.Empty;
                        return;
                    }
                    break;

                case TokenizerState.ReadingInstruction:
                    var instructionMatch = _varInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        string variableName = instructionMatch.Groups["name"].ToString();

                        _tokens.Add(new Token(TokenType.Variable, "var:"));
                        _tokens.Add(new Token(TokenType.Identifier, variableName));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    instructionMatch = _subtemplateInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        string templateName = instructionMatch.Groups["name"].ToString();

                        _tokens.Add(new Token(TokenType.SubtemplateBegin, "subtemplate:"));
                        _tokens.Add(new Token(TokenType.Identifier, templateName));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    instructionMatch = _endSubtemplateInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        _tokens.Add(new Token(TokenType.SubtemplateEnd, "endsubtemplate"));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    throw new InvalidOperationException("Unable to understand the remaining text while parsing it, because some '<$...>' instruction was expected. " +
                                                        $"Remaining text began with this instead: '{string.Concat(_remainingText.Take(20))}'.");
                }
            }
        }
 internal static Action<Stack<TokenizerState>> PushState(TokenizerState state)
 {
     return states => states.Push(state);
 }
            IEnumerator <string> IEnumerable <string> .GetEnumerator()
            {
                StringBuilder  currentToken     = new StringBuilder();
                TokenizerState state            = TokenizerState.WhiteSpace;
                int            parenthesisCount = 0;
                bool           escapeQuote      = false;

                for (int i = 0; i < original.Length; i++)
                {
                    char ch = original[i];
                    switch (state)
                    {
                    case TokenizerState.WhiteSpace:
                        if (ch == '\'')
                        {
                            state = TokenizerState.Quoted;
                            currentToken.Append(ch);
                        }
                        else if (ch == ',')
                        {
                            yield return(",");
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state = TokenizerState.InParenthesis;
                            currentToken.Append(ch);
                            parenthesisCount = 1;
                        }
                        else if (char.IsWhiteSpace(ch) == false)
                        {
                            state = TokenizerState.Token;
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.Quoted:
                        if (escapeQuote)
                        {
                            escapeQuote = false;
                            currentToken.Append(ch);
                        }
                        // handle escaping of ' by using '' or \'
                        else if (ch == '\\' || (ch == '\'' && i + 1 < original.Length && original[i + 1] == '\''))
                        {
                            escapeQuote = true;
                            currentToken.Append(ch);
                        }
                        else if (ch == '\'')
                        {
                            currentToken.Append(ch);
                            yield return(currentToken.ToString());

                            state = TokenizerState.WhiteSpace;
                            currentToken.Length = 0;
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.InParenthesis:
                        if (ch == ')' || ch == ']')
                        {
                            currentToken.Append(ch);
                            parenthesisCount -= 1;
                            if (parenthesisCount == 0)
                            {
                                yield return(currentToken.ToString());

                                currentToken.Length = 0;
                                state = TokenizerState.WhiteSpace;
                            }
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            currentToken.Append(ch);
                            parenthesisCount += 1;
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.Token:
                        if (char.IsWhiteSpace(ch))
                        {
                            yield return(currentToken.ToString());

                            currentToken.Length = 0;
                            state = TokenizerState.WhiteSpace;
                        }
                        else if (ch == ',')                                 // stop current token, and send the , as well
                        {
                            yield return(currentToken.ToString());

                            currentToken.Length = 0;
                            yield return(",");

                            state = TokenizerState.WhiteSpace;
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            parenthesisCount = 1;
                            currentToken.Append(ch);
                        }
                        else if (ch == '\'')
                        {
                            state = TokenizerState.Quoted;
                            currentToken.Append(ch);
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    default:
                        throw new InvalidExpressionException("Could not understand the string " + original);
                    }
                }
                if (currentToken.Length > 0)
                {
                    yield return(currentToken.ToString());
                }
            }
 internal static Action<Stack<TokenizerState>> ReplaceState(TokenizerState state)
 {
     return ReplaceState(state, 1);
 }
Example #59
0
        public CsvToken GetToken()
        {
            if (disposed)
            {
                throw new ObjectDisposedException("tokenizer");
            }

            StringBuilder text = null;

            while (true)
            {
                switch (state)
                {
                case TokenizerState.Unknown:
                {
                    input = reader.Read();

                    if (EndOfStream != input)
                    {
                        TextPosition = TextPosition.Begin();
                        //text = new StringBuilder();
                        state = TokenizerState.Reading;

                        break;
                    }

                    state = TokenizerState.EndOfDocument;

                    break;
                }

                case TokenizerState.FlushLastToken:
                {
                    if (null != text && 0 < text.Length)
                    {
                        state = TokenizerState.EndOfDocument;
                        return(CsvToken.String(text.ToString()));
                    }

                    state = TokenizerState.Failed;

                    break;
                }

                case TokenizerState.EndOfDocument:
                {
                    return(CsvToken.End);
                }

                case TokenizerState.Reading:
                {
                    if (EndOfStream == input)
                    {
                        state = null == text ? TokenizerState.EndOfDocument : TokenizerState.FlushLastToken;
                        break;
                    }

                    if (null == text)
                    {
                        //TextPosition = TextPosition.Begin();
                        text = new StringBuilder();
                    }

                    var current = (char)input;

                    if (IsTerm(current))
                    {
                        if (0 < text.Length)
                        {
                            return(CsvToken.String(text.ToString()));
                        }

                        input = reader.Read();

                        return(CsvToken.Terminal(current));
                    }

                    text.Append(current);

                    input = reader.Read();

                    break;
                }

                default:
                {
                    throw new Exception();
                }
                }
            }
        }
            public IEnumerable<Token> GetTokens()
            {
                if (this.position >= this.value.Length)
                {
                    yield break;
                }
                int readCount = 0;
                bool readCompleted = false;
                string errorMessage = null;
                while (!readCompleted)
                {
                    switch (this.currentState)
                    {
                        case TokenizerState.ReadyToReadKey:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    errorMessage = "Unexpected string end in '{0}' state.".FormatInvariant(this.currentState);
                                    this.currentState = TokenizerState.Error;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                    case '&':
                                        errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, this.currentState);
                                        this.currentState = TokenizerState.Error;
                                        break;
                                    case '/':
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        this.currentState = TokenizerState.ReadKey;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.ReadKey:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    yield return this.CreateToken(TokenType.Key, readCount);
                                    yield return this.CreateToken(TokenType.Value, 0);
                                    readCount = 0;
                                    this.currentState = TokenizerState.Finish;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadValue;
                                        break;
                                    case '&':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        yield return this.CreateToken(TokenType.Value, 0);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadyToReadKey;
                                        break;
                                    case '/':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        yield return this.CreateToken(TokenType.Value, 0);
                                        readCount = 0;
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        //this.currentState = TokenizerState.ReadKey;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.ReadValue:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    yield return this.CreateToken(TokenType.Value, readCount);
                                    readCount = 0;
                                    this.currentState = TokenizerState.Finish;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                        errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, this.currentState);
                                        this.currentState = TokenizerState.Error;
                                        break;
                                    case '&':
                                        yield return this.CreateToken(TokenType.Value, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadyToReadKey;
                                        break;
                                    case '/':
                                        yield return this.CreateToken(TokenType.Value, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        //this.currentState = TokenizerState.ReadValue;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.Finish:
                        case TokenizerState.Error:
                            readCompleted = true;
                            break;
                        default:
                            throw new NotSupportedException();
                    }
                    this.position++;
                }

                if (this.currentState == TokenizerState.Error)
                {
                    throw new FormatException(errorMessage);
                }
            }