C# (CSharp) TokenizerState Examples

Programming Language: C# (CSharp)

Class/Type: TokenizerState

Examples at hotexamples.com: 60

C# (CSharp) TokenizerState - 60 examples found. These are the top rated real world C# (CSharp) examples of TokenizerState extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CreateInfo(1)

GetChar(1)

IsEndOfStream(1)

NextChar(1)

NextLine(1)

Example #1

Show file

File: IterationData.cs Project: mburgman101/CsQuery

        /// <summary>
        /// Use this to prepare the iterator object to continue finding siblings. It retains the parent.
        /// It just avoids having to recreate an instance of this object for the next tag.
        /// </summary>

        public void Reset()
        {
            TokenizerState = TokenizerState.Default;
            HtmlStart = Pos;
            InsertionMode = InsertionMode.Default;
            Element = null;
        }

Example #2

Show file

File: IterationData.cs Project: jeiea/osuDownloader

        /// <summary>
        /// Returns a literal object for the text between HtmlStart (the last position of the end of a
        /// tag) and the current position. If !AllowLiterals then it's wrapped in a span.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against
        /// </param>
        /// <param name="literal">
        /// [out] The literal.
        /// </param>
        ///
        /// <returns>
        /// true if it succeeds, false if it fails.
        /// </returns>

        public bool TryGetLiteral(HtmlElementFactory factory, out IDomObject literal)
        {
            if (Pos <= HtmlStart)
            {
                literal = null;
                return(false);
            }

            // There's plain text -return it as a literal.

            DomText lit;

            switch (InsertionMode)
            {
            case InsertionMode.Invalid:
                lit = new DomInvalidElement();
                break;

            case InsertionMode.Text:
                InsertionMode = InsertionMode.Default;
                lit           = new DomInnerText();
                break;

            default:
                lit = new DomText();
                break;
            }
            literal = lit;

            //if (factory.IsBound)
            //{
            //    lit.SetTextIndex(factory.Document, factory.Document.DocumentIndex.TokenizeString(HtmlStart, Pos - HtmlStart));
            //}
            //else
            //{
            string text = factory.Html.SubstringBetween(HtmlStart, Pos);

            literal.NodeValue = HtmlData.HtmlDecode(text);
            //}

            if (WrapLiterals)
            {
                DomElement wrapper = DomElement.Create("span");
                wrapper.AppendChildUnsafe(literal);
                literal = wrapper;
            }


            if (Parent != null)
            {
                ((DomElement)Parent.Element).AppendChildUnsafe(literal);
                Reset();
                return(false);
            }
            else
            {
                TokenizerState = TokenizerState.Finished;
                return(true);
            }
        }

Example #3

Show file

            public TokenizerState ProcessChar(char c, string fullExpression, int currentIndex)
            {
                TokenizerState state = _tokenProcessors[_current].ProcessChar(c, fullExpression, currentIndex);

                if (state == TokenizerState.Success)
                {
                    _current++;

                    if (_current == _tokenProcessors.Length)
                    {
                        return(TokenizerState.Success);
                    }

                    _startIndexes[_current] = currentIndex - _firstIndex;
                    _tokenProcessors[_current].ResetState();

                    state = _tokenProcessors[_current].ProcessChar(c, fullExpression, currentIndex);
                }

                if (state == TokenizerState.Fail)
                {
                    return(TokenizerState.Fail);
                }

                if (_current == 0 && _firstIndex < 0)
                {
                    _firstIndex = currentIndex;
                }

                return(TokenizerState.Valid);
            }

Example #4

Show file

File: IterationData.cs Project: kaleb/CsQuery

        /// <summary>
        /// Use this to prepare the iterator object to continue finding siblings. It retains the parent.
        /// It just avoids having to recreate an instance of this object for the next tag.
        /// </summary>

        public void Reset()
        {
            TokenizerState = TokenizerState.Default;
            HtmlStart      = Pos;
            InsertionMode  = InsertionMode.Default;
            Element        = null;
        }

Example #5

Show file

        private bool TryFillTokenIfValidAtInputEnd(Token token, TokenizerState state)
        {
            switch (state)
            {
            case TokenizerState.Begin:
                return(false);

            case TokenizerState.Indentation:
                return(true);

            case TokenizerState.Identifier:
                return(true);

            case TokenizerState.Number:
                return(true);

            case TokenizerState.String:
                return(false);

            case TokenizerState.NewLineCR:
                return(true);

            default:
                throw new ArgumentException($"Unexpected state for Tokenizer: '{state}'");
            }
        }

Example #6

Show file

File: TokenException.cs Project: rslijp/sharptiles

 public static PartialExceptionWithContext<TokenException> IllegalStateAt(char c, int index, TokenizerState state)
 {
     string msg =
         String.Format("Tokenizer got invalid state at position {1}({0}). Tokenizer was in state {2}", c, index,
                       state);
     return MakePartial(new TokenException(msg));
 }

Example #7

Show file

File: Tokenizer.cs Project: detlefgrohs/CSasic

 private void EndToken(TokenType tokenType)
 {
     _currentToken.TokenType = tokenType;
     _tokens.Add(_currentToken);
     _currentTokenizerState = TokenizerState.Default;
     _currentToken          = new Token(TokenType.Unknown, string.Empty);
 }

Example #8

Show file

File: XamlTokenizer.cs Project: VlaTo/Libra-Xaml-Styler

        public XamlTokenizer(TextReader reader)
        {
            this.reader = reader;

            lineNumber = 1;
            charPosition = 1;
            state = TokenizerState.NotStarted;
        }

Example #9

Show file

File: Tokenizer.Emit.cs Project: prepare/WebParser

 void EmitOrAppendStrBuf(TokenizerState returnState)
 {
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendStrBufToLongStrBuf();
     }
     else
     {
         EmitStrBuf();
     }
 }

Example #10

Show file

File: TokenizerStateChange.cs Project: tathamoddie/RegexAnalyzer

        internal static Action<Stack<TokenizerState>> ReplaceState(TokenizerState state, short pushCount)
        {
            if (pushCount <= 0)
                throw new ArgumentOutOfRangeException("pushCount", pushCount, "Must be a positive number.");

            return states =>
            {
                states.Pop();
                for (var i = 0; i < pushCount; i ++)
                    states.Push(state);
            };
        }

Example #11

Show file

File: TokenEnumerator.cs Project: rolandzwaga/sharptiles

 private void DetermineState()
 {
     if (Literal())
     {
         _state = TokenizerState.Literal;
     }
     else if (Seperator())
     {
         _state = TokenizerState.Seperator;
     }
     else
     {
         _state = TokenizerState.Normal;
     }
 }

Example #12

Show file

        public TokenizerState Feed(char c, string fullExpression, int currentIndex)
        {
            if (!_stillValid)
            {
                return(TokenizerState.Fail);
            }

            TokenizerState state = _tokenProcessor.ProcessChar(c, fullExpression, currentIndex);

            if (state != TokenizerState.Valid)
            {
                _stillValid = false;
            }

            return(state);
        }

Example #13

Show file

File: IterationData.cs Project: kaleb/CsQuery

        /// <summary>
        /// Close out this element. This method will return true if something can be yielded; this this
        /// means it's got a parent at the top of the heirarchy. Otherwise it's just closed but false is
        /// returned.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against.
        /// </param>
        ///
        /// <returns>
        /// An enumerator that allows foreach to be used to process close element in this collection.
        /// </returns>

        public IEnumerable <IDomObject> CloseElement(HtmlElementFactory factory)
        {
            IDomObject element = null;

            if (TryGetLiteral(factory, out element))
            {
                yield return(element);
            }

            if (Parent != null)
            {
                if (Parent.Parent == null)
                {
                    yield return(Parent.Element);
                }
                Parent.Reset(Pos);
                TokenizerState = TokenizerState.Finished;
            }
        }

Example #14

Show file

File: ParserException.cs Project: fireflycons/Invoke-SqlExecute

        /// <summary>
        /// Creates the invalid tokenizer state exception.
        /// </summary>
        /// <param name="tokenizerState">State of the tokenizer.</param>
        /// <param name="batchSource">The batch source.</param>
        /// <returns>A derived <see cref="ParserException"/> based on the state</returns>
        internal static ParserException CreateInvalidTokenizerStateException(
            TokenizerState tokenizerState,
            IBatchSource batchSource)
        {
            switch (tokenizerState)
            {
            case TokenizerState.SingleQuoteString:
            case TokenizerState.DoubleQuoteString:

                return(new UnclosedStringLiteralException(tokenizerState, batchSource));

            case TokenizerState.BlockComment:

                return(new UnclosedBlockCommentException(tokenizerState, batchSource));

            default:

                return(new ParserException($"Unexpected state {tokenizerState}", batchSource));
            }
        }

Example #15

Show file

File: Tokenizer.ConnectionString.cs Project: srijken/turtlebuild

        /// <summary>
        /// Tries the parse connection string.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="connectionString">The connection string.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseConnectionString <T>(string connectionString, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (connectionString == null)
            {
                throw new ArgumentNullException("connectionString");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                IList <string> groups = GetWords(connectionString, new string[] { "\"\"", "\'\'" }, '\0', EscapeMode.DoubleItem, ";".ToCharArray());

                foreach (string group in groups)
                {
                    IList <string> parts = GetWords(group, new string[] { "\"\"", "\'\'" }, '\0', EscapeMode.DoubleItem, "=".ToCharArray());

                    TokenItem token;
                    if ((parts.Count == 2) && state.Definition.TryGetToken(parts[0], args.CaseSensitive, out token))
                    {
                        token.Evaluate(parts[1], state);
                    }
                    else if (args.SkipUnknownNamedItems)
                    {
                        continue;
                    }
                    else
                    {
                        return(false);
                    }
                }
                // TODO: Parse connectionstring using definition

                to = state.Instance;
                return(true);
            }
        }

Example #16

Show file

File: ParserException.cs Project: fireflycons/Invoke-SqlExecute

        /// <summary>
        /// Formats the tokenizer state error.
        /// </summary>
        /// <param name="state">The state.</param>
        /// <returns>Formatted string.</returns>
        private static string FormatTokenizerStateError(TokenizerState state)
        {
            switch (state)
            {
            case TokenizerState.BlockComment:

                return("Unclosed block comment at end of file");

            case TokenizerState.DoubleQuoteString:

                return("Unclosed double-quote string at end of file");

            case TokenizerState.SingleQuoteString:

                return("Unclosed single-quote string at end of file");

            default:

                return($"Unexpected state at end of file (should not be an error): {state}");
            }
        }

Example #17

Show file

File: AnyOfMatcher.cs Project: viciproject/core

            public TokenizerState ProcessChar(char c, string fullExpression, int currentIndex)
            {
                TokenizerState returnState = TokenizerState.Fail;

                foreach (ITokenProcessor matcher in _tokenProcessors)
                {
                    TokenizerState state = matcher.ProcessChar(c, fullExpression, currentIndex);

                    if (state == TokenizerState.Success)
                    {
                        returnState = state;
                    }

                    if (state == TokenizerState.Valid && returnState == TokenizerState.Fail)
                    {
                        returnState = state;
                    }
                }

                return(returnState);
            }

Example #18

Show file

File: Tokenizer.ConnectionString.cs Project: srijken/turtlebuild

        /// <summary>
        /// Tries to parse the name value collection.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="collection">The collection.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseNameValueCollection <T>(NameValueCollection collection, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (collection == null)
            {
                throw new ArgumentNullException("collection");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                for (int i = 0; i < collection.Count; i++)
                {
                    TokenItem ti;

                    if (!state.Definition.TryGetToken(collection.Keys[i], args.CaseSensitive, out ti))
                    {
                        if (args.SkipUnknownNamedItems)
                        {
                            continue;
                        }
                        else
                        {
                            return(false);
                        }
                    }

                    ti.Evaluate(collection[i], state);
                }

                to = state.Instance;
                return(true);
            }
        }

Example #19

Show file

File: Tokenizer.ConnectionString.cs Project: srijken/turtlebuild

        /// <summary>
        /// Tries to parse the name value collection.
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="collection">The collection.</param>
        /// <param name="args">The args.</param>
        /// <param name="to">To.</param>
        /// <returns></returns>
        public static bool TryParseNameValueCollection <T>(IDictionary <string, string> collection, TokenizerArgs args, out T to)
            where T : class, new()
        {
            if (collection == null)
            {
                throw new ArgumentNullException("collection");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            to = null;
            using (TokenizerState <T> state = NewState <T>(args))
            {
                foreach (KeyValuePair <string, string> kvp in collection)
                {
                    TokenItem ti;

                    if (!state.Definition.TryGetToken(kvp.Key, args.CaseSensitive, out ti))
                    {
                        if (args.SkipUnknownNamedItems)
                        {
                            continue;
                        }
                        else
                        {
                            return(false);
                        }
                    }

                    ti.Evaluate(kvp.Value, state);
                }

                to = state.Instance;
                return(true);
            }
        }

Example #20

Show file

            public IEnumerable <Token> GetTokens()
            {
                if (position >= value.Length)
                {
                    yield break;
                }
                int    readCount     = 0;
                bool   readCompleted = false;
                string errorMessage  = null;

                while (!readCompleted)
                {
                    switch (currentState)
                    {
                    case TokenizerState.ReadyToReadKey:
                    {
                        if (position >= value.Length)
                        {
                            errorMessage = "Unexpected string end in '{0}' state.".FormatInvariant(currentState);
                            currentState = TokenizerState.Error;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                        case '&':
                            errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, currentState);
                            currentState = TokenizerState.Error;
                            break;

                        case '/':
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            currentState = TokenizerState.ReadKey;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.ReadKey:
                    {
                        if (position >= value.Length)
                        {
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                            yield return(CreateToken(TokenType.Key, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.ReadValue;
                            break;

                        case '&':
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.ReadyToReadKey;
                            break;

                        case '/':
                            yield return(CreateToken(TokenType.Key, readCount));

                            yield return(CreateToken(TokenType.Value, 0));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.ReadValue:
                    {
                        if (position >= value.Length)
                        {
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;
                        }
                        char currentChar = value[position];
                        switch (currentChar)
                        {
                        case '=':
                            errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, currentState);
                            currentState = TokenizerState.Error;
                            break;

                        case '&':
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.ReadyToReadKey;
                            break;

                        case '/':
                            yield return(CreateToken(TokenType.Value, readCount));

                            readCount    = 0;
                            currentState = TokenizerState.Finish;
                            break;

                        default:
                            readCount++;
                            break;
                        }
                        break;
                    }

                    case TokenizerState.Finish:
                    case TokenizerState.Error:
                        readCompleted = true;
                        break;

                    default:
                        throw new NotSupportedException();
                    }
                    position++;
                }

                if (currentState == TokenizerState.Error)
                {
                    throw new FormatException(errorMessage);
                }
            }

Example #21

Show file

File: XamlTokenizer.cs Project: VlaTo/Libra-Xaml-Styler

        /// <summary>
        /// 
        /// </summary>
        /// <returns></returns>
        public XamlToken ReadNextToken()
        {
            var on = true;

            while (on)
            {
                switch (state)
                {
                    case TokenizerState.EndOfStream:
                    {
                        return new XamlToken(XamlTokenType.EndOfStream, lineNumber, charPosition);
                    }

                    case TokenizerState.NotStarted:
                    {
                        var current = ReadNextChar();

                        if (-1 == current)
                        {
                            state = TokenizerState.EndOfStream;
                            continue;
                        }

                        if (Char.IsWhiteSpace((char) current))
                        {
                            state = TokenizerState.HeadingWhitespaces;
                            continue;
                        }


                        switch (current)
                        {
                            case '=':
                            case '<':
                            case '>':
                            {
                                state = TokenizerState.Terminal;
                                return new XamlToken(XamlTokenType.Terminal, ((char) current).ToString(), lineNumber, charPosition);
                            }
                        }

                        break;
                    }
                }
            }

            return new XamlToken(
                XamlTokenType.Terminal,
                '<'.ToString(),
                lineNumber,
                charPosition);
        }

Example #22

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		public void LoadState(Tokenizer other)
		{
			strBufLen = other.strBufLen;
			if (strBufLen > strBuf.Length)
			{
				strBuf = new char[strBufLen];
			}
			//Array.Copy(other.strBuf, strBuf, strBufLen);
		    Buffer.BlockCopy(other.strBuf, 0, strBuf, 0, strBufLen << 1);
            
			longStrBufLen = other.longStrBufLen;
			if (longStrBufLen > longStrBuf.Length)
			{
				longStrBuf = new char[longStrBufLen];
			}
			//Array.Copy(other.longStrBuf, longStrBuf, longStrBufLen);
            Buffer.BlockCopy(other.longStrBuf, 0, longStrBuf, 0,longStrBufLen<< 1);

			stateSave = other.stateSave;
			returnStateSave = other.returnStateSave;
			endTagExpectation = other.endTagExpectation;
			endTagExpectationAsArray = other.endTagExpectationAsArray;
			// line = 1; XXX line numbers
			lastCR = other.lastCR;
			index = other.index;
			forceQuirks = other.forceQuirks;
			additional = other.additional;
			entCol = other.entCol;
			firstCharKey = other.firstCharKey;
			lo = other.lo;
			hi = other.hi;
			candidate = other.candidate;
			strBufMark = other.strBufMark;
			prevValue = other.prevValue;
			value = other.value;
			seenDigits = other.seenDigits;
			endTag = other.endTag;
			shouldSuspend = false;

			if (other.doctypeName == null)
			{
				doctypeName = null;
			}
			else
			{
				doctypeName = other.doctypeName;
			}

			if (other.systemIdentifier == null)
			{
				systemIdentifier = null;
			}
			else
			{
				systemIdentifier = other.systemIdentifier;
			}

			if (other.publicIdentifier == null)
			{
				publicIdentifier = null;
			}
			else
			{
				publicIdentifier = other.publicIdentifier;
			}

			if (other.tagName == null)
			{
				tagName = null;
			}
			else
			{
				tagName = other.tagName.CloneElementName();
			}

			if (other.attributeName == null)
			{
				attributeName = null;
			}
			else
			{
				attributeName = other.attributeName.CloneAttributeName();
			}
            
			if (other.attributes == null)
			{
				attributes = null;
			}
			else
			{
				attributes = other.attributes.CloneAttributes();
			}
		}

Example #23

Show file

        public IEnumerator <BaseToken> GetBaseTokens(IEnumerator <char> charEnumer)
        {
            TokenizerState state = TokenizerState.ReadNothing;

            string currentToken = "";

            while (charEnumer.MoveNext())
            {
                char c = charEnumer.Current;

                switch (state)
                {
                case TokenizerState.ReadNothing:
                {
                    if (c == '-')
                    {
                        yield return(new BaseToken(BaseTokenType.minus, "-"));
                    }
                    else if (c == '"')
                    {
                        state = TokenizerState.ReadQuote;
                    }
                    else if (c == '(')
                    {
                        state = TokenizerState.ReadRound;
                    }
                    else if (c == '[')
                    {
                        state = TokenizerState.ReadSquare;
                    }
                    else if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
                    {
                        break;
                    }
                    else
                    {
                    }
                    break;
                }

                case TokenizerState.ReadQuote:
                {
                    if (c == '\\')
                    {
                        state = TokenizerState.ReadSlashInText;
                    }
                    else if (c == '"')
                    {
                        yield return(new BaseToken(BaseTokenType.text, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }

                case TokenizerState.ReadSlashInText:
                {
                    currentToken += c;
                    state         = TokenizerState.ReadQuote;
                    break;
                }

                case TokenizerState.ReadRound:
                {
                    if (c == ')')
                    {
                        yield return(new BaseToken(BaseTokenType.rndBrktContent, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }

                case TokenizerState.ReadSquare:
                {
                    if (c == ']')
                    {
                        yield return(new BaseToken(BaseTokenType.sqrBrktContent, currentToken));

                        currentToken = "";
                        state        = TokenizerState.ReadNothing;
                    }
                    else
                    {
                        currentToken += c;
                    }
                    break;
                }
                }
            }


            switch (state)
            {
            case TokenizerState.ReadQuote:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading text, expected (\")");
            }

            case TokenizerState.ReadSlashInText:
            {
                throw new UnexpectedEndOfInputError("Met end of input after reading \"\\\", expected symbol");
            }

            case TokenizerState.ReadRound:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading round round content, expected \")\"");
            }

            case TokenizerState.ReadSquare:
            {
                throw new UnexpectedEndOfInputError("Met end of input while reading round square content, expected \"]\"");
            }

            case TokenizerState.ReadNothing:
            {
                break;
            }
            }
        }

Example #24

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		private void HandleNcrValue(TokenizerState returnState)
		{
			/*
			 * If one or more characters match the range, then take them all and
			 * interpret the string of characters as a number (either hexadecimal or
			 * decimal as appropriate).
			 */
			if (value <= 0xFFFF)
			{
				if (value >= 0x80 && value <= 0x9f)
				{
					/*
					 * If that number is one of the numbers in the first column of
					 * the following table, then this is a parse error.
					 */
					ErrNcrInC1Range();
					/*
					 * Find the row with that number in the first column, and return
					 * a character token for the Unicode character given in the
					 * second column of that row.
					 */
					char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
					EmitOrAppendOne(val, returnState);
					// [NOCPP[
				}
				else if (value == 0xC
					  && contentSpacePolicy != XmlViolationPolicy.Allow)
				{
					if (contentSpacePolicy == XmlViolationPolicy.AlterInfoset)
					{
						EmitOrAppendOne(SPACE, returnState);
					}
					else if (contentSpacePolicy == XmlViolationPolicy.Fatal)
					{
						Fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
					}
					// ]NOCPP]
				}
				else if (value == 0x0)
				{
					ErrNcrZero();
					EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
				}
				else if ((value & 0xF800) == 0xD800)
				{
					ErrNcrSurrogate();
					EmitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
				}
				else
				{
					/*
					 * Otherwise, return a character token for the Unicode character
					 * whose code point is that number.
					 */
					char ch = (char)value;
					// [NOCPP[
					if (value == 0x0D)
					{
						ErrNcrCr();
					}
					else if ((value <= 0x0008) || (value == 0x000B)
						  || (value >= 0x000E && value <= 0x001F))
					{
						ch = ErrNcrControlChar(ch);
					}
					else if (value >= 0xFDD0 && value <= 0xFDEF)
					{
						ErrNcrUnassigned();
					}
					else if ((value & 0xFFFE) == 0xFFFE)
					{
						ch = ErrNcrNonCharacter(ch);
					}
					else if (value >= 0x007F && value <= 0x009F)
					{
						ErrNcrControlChar();
					}
					else
					{
						MaybeWarnPrivateUse(ch);
					}
					// ]NOCPP]
					bmpChar[0] = ch;
					EmitOrAppendOne(bmpChar, returnState);
				}
			}
			else if (value <= 0x10FFFF)
			{
				// [NOCPP[
				MaybeWarnPrivateUseAstral();
				if ((value & 0xFFFE) == 0xFFFE)
				{
					ErrAstralNonCharacter(value);
				}
				// ]NOCPP]
				astralChar[0] = (char)(LEAD_OFFSET + (value >> 10));
				astralChar[1] = (char)(0xDC00 + (value & 0x3FF));
				EmitOrAppendTwo(astralChar, returnState);
			}
			else
			{
				ErrNcrOutOfRange();
				EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
			}
		}

Example #25

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		private void EmitOrAppendOne(char[] val, TokenizerState returnState)
		{
			//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
			if (((byte)returnState & DATA_AND_RCDATA_MASK) ==0)
			{
				AppendLongStrBuf(val[0]);
			}
			else
			{
				TokenHandler.Characters(val, 0, 1);
			}
		}

Example #26

Show file

File: Tokenizer.StateLoop3_ScriptData.cs Project: prepare/WebParser

        void StateLoop3_ScriptData(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                   
                    // XXX reorder point
                    case TokenizerState.s06_SCRIPT_DATA:
                        /*scriptdataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data less-than sign state.
                                         */
                                        FlushChars();
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN;
                                        goto breakScriptdataloop; // FALL THRU continue
                                    // stateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataloop:
                            goto case TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s17_SCRIPT_DATA_LESS_THAN_SIGN:
                        /*scriptdatalessthansignloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto continueStateloop;
                                    case '!':
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
                                        state = TokenizerState.s20_SCRIPT_DATA_ESCAPE_START;

                                        goto breakScriptdatalessthansignloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    default:
                                        /*
                                         * Otherwise, emit a U+003C LESS-THAN SIGN
                                         * character token
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatalessthansignloop:
                            goto case TokenizerState.s20_SCRIPT_DATA_ESCAPE_START;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s20_SCRIPT_DATA_ESCAPE_START:
                        /*scriptdataescapestartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escape start dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
                                        state = TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH;
                                        goto breakScriptdataescapestartloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapestartloop:
                            goto case TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s21_SCRIPT_DATA_ESCAPE_START_DASH:
                        /*scriptdataescapestartdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                                        goto breakScriptdataescapestartdashloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapestartdashloop:
                            goto case TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH:
                        /*scriptdataescapeddashdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Stay in the
                                         * script data escaped dash dash state.
                                         */
                                        continue;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit a U+003E
                                         * GREATER-THAN SIGN character token. Switch to
                                         * the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakScriptdataescapeddashdashloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakScriptdataescapeddashdashloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapeddashdashloop:
                            goto case TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s22_SCRIPT_DATA_ESCAPED:
                        /*scriptdataescapedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
                                        state = TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH;
                                        goto breakScriptdataescapedloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data escaped state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapedloop:
                            goto case TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s23_SCRIPT_DATA_ESCAPED_DASH:
                        /*scriptdataescapeddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s24_SCRIPT_DATA_ESCAPED_DASH_DASH;
                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * script data escaped less-than sign state.
                                         */
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                                        goto breakScriptdataescapeddashloop;
                                    // goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapeddashloop:
                            goto case TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s25_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
                        /*scriptdataescapedlessthanloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data escaped end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        returnState = TokenizerState.s22_SCRIPT_DATA_ESCAPED;

                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto continueStateloop;
                                    case 'S':
                                    case 's':
                                        /*
                                         * U+0041 LATIN CAPITAL LETTER A through to
                                         * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
                                         * LESS-THAN SIGN character token and the
                                         * current input character as a character token.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        index = 1;
                                        /*
                                         * Set the temporary buffer to the empty string.
                                         * Append the lowercase TokenizerState.version of the current
                                         * input character (add 0x0020 to the
                                         * character's code point) to the temporary
                                         * buffer. Switch to the script data double
                                         * escape start state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
                                        state = TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START;
                                        goto breakScriptdataescapedlessthanloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Emit a U+003C LESS-THAN SIGN
                                         * character token and reconsume the current
                                         * input character in the script data escaped
                                         * state.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        reader.StartCollect();
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdataescapedlessthanloop:
                            goto case TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s28_SCRIPT_DATA_DOUBLE_ESCAPE_START:
                        /*scriptdatadoubleescapestartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                Debug.Assert(index > 0);
                                if (index < 6)
                                {
                                    // SCRIPT_ARR.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        //make it lower case 
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.SCRIPT_ARR[index])
                                    {
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                switch (c)
                                {
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                    case '/':
                                    case '>':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
                                         * (>) Emit the current input character as a
                                         * character token. If the temporary buffer is
                                         * the string "script", then switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakScriptdatadoubleescapestartloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data escaped state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapestartloop:
                            goto case TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED:
                        /*scriptdatadoubleescapedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data double escaped dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
                                        state = TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
                                        goto breakScriptdatadoubleescapedloop; // FALL THRU
                                    // continue
                                    // stateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * script data double escaped state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapedloop:
                            goto case TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s30_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
                        /*scriptdatadoubleescapeddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Switch to the
                                         * script data double escaped dash dash state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
                                        state = TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
                                        goto breakScriptdatadoubleescapeddashloop;
                                    // goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapeddashloop:
                            goto case TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s31_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
                        /*scriptdatadoubleescapeddashdashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Emit a U+002D
                                         * HYPHEN-MINUS character token. Stay in the
                                         * script data double escaped dash dash state.
                                         */
                                        continue;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Emit a U+003C
                                         * LESS-THAN SIGN character token. Switch to the
                                         * script data double escaped less-than sign
                                         * state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                                        goto breakScriptdatadoubleescapeddashdashloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit a U+003E
                                         * GREATER-THAN SIGN character token. Switch to
                                         * the script data state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
                                        state = TokenizerState.s06_SCRIPT_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Switch to the
                                         * script data double escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapeddashdashloop:
                            goto case TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s32_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
                        /*scriptdatadoubleescapedlessthanloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
                                         * character token. Set the temporary buffer to
                                         * the empty string. Switch to the script data
                                         * double escape end state.
                                         */
                                        index = 0;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
                                        state = TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END;
                                        goto breakScriptdatadoubleescapedlessthanloop;
                                    default:
                                        /*
                                         * Anything else Reconsume the current input
                                         * character in the script data double escaped
                                         * state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakScriptdatadoubleescapedlessthanloop:
                            goto case TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s33_SCRIPT_DATA_DOUBLE_ESCAPE_END:
                        /*scriptdatadoubleescapeendloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                if (index < 6)
                                {
                                    // SCRIPT_ARR.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.SCRIPT_ARR[index])
                                    {
                                        reader.StepBack();
                                        //reconsume = true;
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                switch (c)
                                {
                                    case '\r':
                                        EmitCarriageReturn();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                    case '/':
                                    case '>':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
                                         * (>) Emit the current input character as a
                                         * character token. If the temporary buffer is
                                         * the string "script", then switch to the
                                         * script data escaped state.
                                         */
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s22_SCRIPT_DATA_ESCAPED;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Reconsume the current input character in the
                                         * script data double escaped state.
                                         */
                                        //reconsume = true;
                                        reader.StepBack();
                                        //state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
                                        state = TokenizerState.s29_SCRIPT_DATA_DOUBLE_ESCAPED;
                                        goto continueStateloop;
                                }
                            }
                        }
                        //------------------------------------
                        //eof
                        goto breakStateloop; 
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }

Example #27

Show file

File: Tokenizer.Emit.cs Project: prepare/WebParser

 void EmitOrAppendOne(char[] val, TokenizerState returnState)
 {
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
     }
     else
     {
         TokenListener.Characters(val, 0, 1);
     }
 }

Example #28

Show file

 /// <summary>
 /// Initializes a new instance of the <see cref="UnclosedStringLiteralException"/> class.
 /// </summary>
 /// <param name="tokenizerState">State of the tokenizer.</param>
 /// <param name="batchSource">The batch source.</param>
 internal UnclosedStringLiteralException(TokenizerState tokenizerState, IBatchSource batchSource)
     : base(tokenizerState, batchSource)
 {
 }

Example #29

Show file

File: Tokenizer.cs Project: detlefgrohs/CSasic

 private void StartToken(TokenizerState tokenizerState, string character = "")
 {
     _currentTokenizerState = tokenizerState;
     _currentToken.Text    += character;
 }

Example #30

Show file

        internal static bool TryWrite(XmlWriter writer, T instance, TokenizerArgs args)
        {
            if (writer == null)
            {
                throw new ArgumentNullException("writer");
            }
            else if (instance == null)
            {
                throw new ArgumentNullException("instance");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            Hashtable written = new Hashtable();

            using (TokenizerState <T> state = Tokenizer.NewState <T>(args, instance))
            {
                // Step 1: Try to write tokens as attributes
                foreach (TokenMember member in state.Definition.AllTokenMembers)
                {
                    object[] values = member.GetValues(state);

                    if (member.Tokens.Count > 0 && member.Groups.Count > 0)
                    {
                        continue;                         // Write the members as element
                    }
                    if ((values == null) || (values.Length == 0) || values.Length > 1)
                    {
                        continue;
                    }
                    else if (member.Tokens.Count <= 0)
                    {
                        continue;
                    }

                    written[member] = member;

                    foreach (object value in values)
                    {
                        if (value == null)
                        {
                            continue;
                        }

                        Type type = value.GetType();

                        foreach (TokenItem ti in member.Tokens)
                        {
                            if (ti.Name == null)
                            {
                                continue;
                            }

                            if (ti.ValueType != null && !ti.ValueType.IsAssignableFrom(type))
                            {
                                continue;
                            }

                            // Will throw if multiple times written -> Definition bug, resolve there
                            writer.WriteAttributeString(ti.Name, ti.GetStringValue(value, state));
                            break;
                        }
                    }
                }

                // Step 2: Write tokengroups and members with multiple values
                foreach (TokenMember member in state.Definition.AllTokenMembers)
                {
                    if (written.Contains(member))
                    {
                        continue;
                    }

                    object[] values = member.GetValues(state);

                    if ((values == null) || (values.Length == 0))
                    {
                        continue;
                    }

                    foreach (object value in values)
                    {
                        if (value == null)
                        {
                            continue;
                        }

                        Type type = value.GetType();

                        bool writtenItem = false;

                        foreach (TokenGroupItem tg in member.Groups)
                        {
                            if (tg.ValueType != null && !tg.ValueType.IsAssignableFrom(type))
                            {
                                continue;
                            }

                            writer.WriteStartElement(tg.Name);

                            // Will throw if multiple times written -> Definition bug, resolve there
                            if (!tg.TryWriteXml(writer, args.Clone(state.Instance), value))
                            {
                                return(false);
                            }

                            writer.WriteEndElement();
                            writtenItem = true;
                            break;
                        }

                        if (!writtenItem)
                        {
                            foreach (TokenItem ti in member.Tokens)
                            {
                                if (ti.Name == null)
                                {
                                    continue;
                                }

                                if (ti.ValueType != null && !ti.ValueType.IsAssignableFrom(type))
                                {
                                    continue;
                                }

                                // Will throw if multiple times written -> Definition bug, resolve there
                                writer.WriteElementString(ti.Name, ti.GetStringValue(value, state));
                                break;
                            }
                        }
                    }
                }
            }

            return(true);
        }

Example #31

Show file

        internal static bool TryParse(IXPathNavigable element, TokenizerArgs args, out T to)
        {
            XPathNavigator nav = element.CreateNavigator();

            to = null;
            using (TokenizerState <T> state = Tokenizer.NewState <T>(args))
            {
                if (nav.MoveToFirstAttribute())
                {
                    do
                    {
                        TokenItem ti;

                        if (!state.Definition.TryGetToken(nav.LocalName, args.CaseSensitive, out ti))
                        {
                            if (args.SkipUnknownNamedItems)
                            {
                                continue;
                            }
                            else
                            {
                                return(false);
                            }
                        }

                        ti.Evaluate(nav.Value, state);
                    }while (nav.MoveToNextAttribute());
                    nav.MoveToParent();
                }

                if (nav.HasChildren)
                {
                    if (nav.MoveToFirstChild())
                    {
                        do
                        {
                            string name = nav.LocalName;

                            TokenGroupItem group;
                            TokenItem      ti;
                            if (state.Definition.TryGetGroup(name, args.CaseSensitive, out group))
                            {
                                object value;

                                if (!group.TryParseXml(nav, args.Clone(state.Instance), out value))
                                {
                                    return(false);
                                }

                                group.Member.SetValue(state, value);
                            }
                            else if (state.Definition.TryGetToken(name, args.CaseSensitive, out ti))
                            {
                                // Allow tokens as element
                                ti.Evaluate(nav.Value, state);
                            }
                            else if (!args.SkipUnknownNamedItems)
                            {
                                return(false);
                            }
                        }while (nav.MoveToNext(XPathNodeType.Element));
                    }
                }

                to = state.Instance;
                return(true);
            }
        }

Example #32

Show file

File: FunctionTokenizer.cs Project: f1nalspace/final_game_tech

        public static List <Token> Tokenize(string buffer)
        {
            TokenizerOutput ctx   = new TokenizerOutput();
            TokenizerState  state = new TokenizerState(buffer);

            while (!state.IsEndOfStream())
            {
                while (!state.IsEndOfStream() && state.GetChar() != '\n' && char.IsWhiteSpace(state.GetChar()))
                {
                    state.NextChar();
                }
                if (state.IsEndOfStream())
                {
                    break;
                }
                char c = state.GetChar();
                switch (c)
                {
                case '\n':
                    state.NextLine();
                    state.NextChar();
                    break;

                case '(':
                    ctx.AddSymbol(TokenType.BraceBegin, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case ')':
                    ctx.AddSymbol(TokenType.BraceEnd, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case ',':
                    ctx.AddSymbol(TokenType.ArgumentSeparator, c, state.CreateInfo());
                    state.NextChar();
                    break;

                case '*':
                    ctx.AddSymbol(TokenType.Pointer, c, state.CreateInfo());
                    state.NextChar();
                    break;

                default:
                    if (char.IsLetter(c) || c == '_')
                    {
                        int start = state.BufferPos;
                        while (!state.IsEndOfStream() && (char.IsLetterOrDigit(state.GetChar()) || (state.GetChar() == '_')))
                        {
                            state.NextChar();
                        }
                        int    len   = state.BufferPos - start;
                        string ident = buffer.Substring(start, len);
                        ctx.AddIdent(ident, state.CreateInfo());
                    }
                    else
                    {
                        ctx.AddChar(c, state.CreateInfo());
                        state.NextChar();
                    }
                    break;
                }
            }
            return(ctx.Tokens);
        }

Example #33

Show file

File: Tokenizer.cs Project: prepare/WebParser

        // ]NOCPP]

        // For the token handler to call
        /**
         * Sets the tokenizer state and the associated element name. This should 
         * only ever used to put the tokenizer into one of the states that have
         * a special end tag expectation.
         * 
         * @param specialTokenizerState
         *            the tokenizer state to set
         * @param endTagExpectation
         *            the expected end tag for transitioning back to normal
         */
        public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
                [Local] String endTagExpectation)
        {
            this.stateSave = specialTokenizerState;
            if (specialTokenizerState == TokenizerState.s01_DATA)
            {
                return;
            }
            this.endTagExpectation = ElementName.ElementNameByBuffer(endTagExpectation.ToCharArray());
            EndTagExpectationToArray();
        }

Example #34

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		private int StateLoop(TokenizerState state, char c,
                int pos, char[] buf, bool reconsume, TokenizerState returnState,
				int endPos)
		{
			/*
			 * Idioms used in this code:
			 * 
			 * 
			 * Consuming the next input character
			 * 
			 * To consume the next input character, the code does this: if (++pos ==
			 * endPos) { goto breakStateloop; } c = buf[pos];
			 * 
			 * 
			 * Staying in a state
			 * 
			 * When there's a state that the tokenizer may stay in over multiple
			 * input characters, the state has a wrapper |for(;;)| loop and staying
			 * in the state continues the loop.
			 * 
			 * 
			 * Switching to another state
			 * 
			 * To switch to another state, the code sets the state variable to the
			 * magic number of the new state. Then it either continues stateloop or
			 * breaks out of the state's own wrapper loop if the target state is
			 * right after the current state in source order. (This is a partial
			 * workaround for Java's lack of goto.)
			 * 
			 * 
			 * Reconsume support
			 * 
			 * The spec sometimes says that an input character is reconsumed in
			 * another state. If a state can ever be entered so that an input
			 * character can be reconsumed in it, the state's code starts with an
			 * |if (reconsume)| that sets reconsume to false and skips over the
			 * normal code for consuming a new character.
			 * 
			 * To reconsume the current character in another state, the code sets
			 * |reconsume| to true and then switches to the other state.
			 * 
			 * 
			 * Emitting character tokens
			 * 
			 * This method emits character tokens lazily. Whenever a new range of
			 * character tokens starts, the field cstart must be set to the start
			 * index of the range. The flushChars() method must be called at the end
			 * of a range to flush it.
			 * 
			 * 
			 * U+0000 handling
			 * 
			 * The various states have to handle the replacement of U+0000 with
			 * U+FFFD. However, if U+0000 would be reconsumed in another state, the
			 * replacement doesn't need to happen, because it's handled by the
			 * reconsuming state.
			 * 
			 * 
			 * LF handling
			 * 
			 * Every state needs to increment the line number upon LF unless the LF
			 * gets reconsumed by another state which increments the line number.
			 * 
			 * 
			 * CR handling
			 * 
			 * Every state needs to handle CR unless the CR gets reconsumed and is
			 * handled by the reconsuming state. The CR needs to be handled as if it
			 * were and LF, the lastCR field must be set to true and then this
			 * method must return. The IO driver will then swallow the next
			 * character if it is an LF to coalesce CRLF.
			 */

			/*
			 * As there is no support for labeled loops in C#, instead of break <loop>;
			 * the port uses goto break<loop>; and a label after the loop.
			 * Instead of continue <loop>; it uses goto continue<loop>; and a label
			 * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
			 */

			/*stateloop:*/
			for (; ; )
			{
			continueStateloop:

				switch (state)
				{
					case TokenizerState.DATA:
						/*dataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in data state.
									 */
									FlushChars(buf, pos);
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\u0000');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the tag
									 * open state.
									 */
									FlushChars(buf, pos);

									//state = Transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
							        state = TokenizerState.TAG_OPEN;
									goto breakDataloop; // FALL THROUGH continue
								// stateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the input character as a
									 * character token.
									 * 
									 * Stay in the data state.
									 */
									continue;
							}
						}
					breakDataloop:
						goto case TokenizerState.TAG_OPEN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.TAG_OPEN:
						/*tagopenloop:*/
						for (; ; )
						{
							/*
							 * The behavior of this state depends on the content
							 * model flag.
							 */
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * If the content model flag is set to the PCDATA state
							 * Consume the next input character:
							 */
							if (c >= 'A' && c <= 'Z')
							{
								/*
								 * U+0041 LATIN CAPITAL LETTER A through to U+005A
								 * LATIN CAPITAL LETTER Z Create a new start tag
								 * token,
								 */
								endTag = false;
								/*
								 * set its tag name to the lowercase TokenizerState.version of the
								 * input character (add 0x0020 to the character's
								 * code point),
								 */
								ClearStrBufAndAppend((char)(c + 0x20));
								/* then switch to the tag name state. */
								//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                state = TokenizerState.TAG_NAME;
								/*
								 * (Don't emit the token yet; further details will
								 * be filled in before it is emitted.)
								 */
								goto breakTagopenloop;
								// goto continueStateloop;
							}
							else if (c >= 'a' && c <= 'z')
							{
								/*
								 * U+0061 LATIN SMALL LETTER A through to U+007A
								 * LATIN SMALL LETTER Z Create a new start tag
								 * token,
								 */
								endTag = false;
								/*
								 * set its tag name to the input character,
								 */
								ClearStrBufAndAppend(c);
								/* then switch to the tag name state. */
								//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                state = TokenizerState.TAG_NAME;
								/*
								 * (Don't emit the token yet; further details will
								 * be filled in before it is emitted.)
								 */
								goto breakTagopenloop;
								// goto continueStateloop;
							}
							switch (c)
							{
								case '!':
									/*
									 * U+0021 EXCLAMATION MARK (!) Switch to the
									 * markup declaration open state.
									 */
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_OPEN;
									goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the close tag
									 * open state.
									 */
									//state = Transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
                                    state = TokenizerState.CLOSE_TAG_OPEN;
									goto continueStateloop;
								case '?':
									/*
									 * U+003F QUESTION MARK (?) Parse error.
									 */
									ErrProcessingInstruction();
									/*
									 * Switch to the bogus comment state.
									 */
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrLtGt();
									/*
									 * Emit a U+003C LESS-THAN SIGN character token
									 * and a U+003E GREATER-THAN SIGN character
									 * token.
									 */
									TokenHandler.Characters(LT_GT, 0, 2);
									/* Switch to the data state. */
									cstart = pos + 1;
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrBadCharAfterLt(c);
									/*
									 * Emit a U+003C LESS-THAN SIGN character token
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakTagopenloop:
						goto case TokenizerState.TAG_NAME;
					// FALL THROUGH DON'T REORDER
					case TokenizerState.TAG_NAME:
						/*tagnameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
									state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakTagnameloop;
								// goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									StrBufToElementNameString();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									StrBufToElementNameString();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Append the
										 * lowercase TokenizerState.version of the current input
										 * character (add 0x0020 to the character's
										 * code point) to the current tag token's
										 * tag name.
										 */
										c += (char)0x20;
									}
									/*
									 * Anything else Append the current input
									 * character to the current tag token's tag
									 * name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the tag name state.
									 */
									continue;
							}
						}
					breakTagnameloop:
						goto case TokenizerState.BEFORE_ATTRIBUTE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_ATTRIBUTE_NAME:
						/*beforeattributenameloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before attribute name state.
									 */
									continue;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;

									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '\"';
								case '\"':
								case '\'':
								case '<':
								case '=':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
									 * SIGN (=) Parse error.
									 */
									ErrBadCharBeforeAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									/*
									 * Anything else Start a new attribute in the
									 * current tag token.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Set that
										 * attribute's name to the lowercase TokenizerState.version
										 * of the current input character (add
										 * 0x0020 to the character's code point)
										 */
										c += (char)0x20;
									}
									/*
									 * Set that attribute's name to the current
									 * input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * and its value to the empty string.
									 */
									// Will do later.
									/*
									 * Switch to the attribute name state.
									 */
									//state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_NAME;
									goto breakBeforeattributenameloop;
								// goto continueStateloop;
							}
						}
					breakBeforeattributenameloop:
						goto case TokenizerState.ATTRIBUTE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.ATTRIBUTE_NAME:
						/*attributenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the after attribute name state.
									 */
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_NAME;
                                    goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									AttributeNameComplete();
									AddAttributeWithoutValue();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '=':
									/*
									 * U+003D EQUALS SIGN (=) Switch to the before
									 * attribute value state.
									 */
									AttributeNameComplete();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_VALUE;
									goto breakAttributenameloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AttributeNameComplete();
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '\"';
								case '\"':
								case '\'':
								case '<':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) Parse error.
									 */
									ErrQuoteOrLtInAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Append the
										 * lowercase TokenizerState.version of the current input
										 * character (add 0x0020 to the character's
										 * code point) to the current attribute's
										 * name.
										 */
										c += (char)0x20;
									}
									/*
									 * Anything else Append the current input
									 * character to the current attribute's name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the attribute name state.
									 */
									continue;
							}
						}
					breakAttributenameloop:
						goto case TokenizerState.BEFORE_ATTRIBUTE_VALUE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_ATTRIBUTE_VALUE:
						/*beforeattributevalueloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before attribute value state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the
									 * attribute value (double-quoted) state.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED;

									goto breakBeforeattributevalueloop;
								// goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the attribute
									 * value (unquoted) state and reconsume this
									 * input character.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_UNQUOTED;
									NoteUnquotedAttributeValue();
									reconsume = true;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the attribute
									 * value (single-quoted) state.
									 */
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrAttributeValueMissing();
									/*
									 * Emit the current tag token.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto case '<';
								case '<':
								case '=':
								case '`':
									/*
									 * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
									 * (=) U+0060 GRAVE ACCENT (`)
									 */
									ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									// [NOCPP[
									ErrHtml4NonNameInUnquotedAttribute(c);
									// ]NOCPP]
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									ClearLongStrBufAndAppend(c);
									/*
									 * Switch to the attribute value (unquoted)
									 * state.
									 */

									//state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_VALUE_UNQUOTED;

									NoteUnquotedAttributeValue();
									goto continueStateloop;
							}
						}
					breakBeforeattributevalueloop:
						goto case TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.ATTRIBUTE_VALUE_DOUBLE_QUOTED:
						/*attributevaluedoublequotedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * attribute value (quoted) state.
									 */
									AddAttributeWithValue();

									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
									goto breakAttributevaluedoublequotedloop;
								// goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * additional allowed character being U+0022
									 * QUOTATION MARK (").
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\"');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (double-quoted)
									 * state.
									 */
									continue;
							}
						}
					breakAttributevaluedoublequotedloop:
						goto case TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED:
						/*afterattributevaluequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto continueStateloop;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto breakAfterattributevaluequotedloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrNoSpaceBetweenAttributes();
									/*
									 * Reconsume the character in the before
									 * attribute name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakAfterattributevaluequotedloop:
						goto case TokenizerState.SELF_CLOSING_START_TAG;
					// FALLTHRU DON'T REORDER
					case TokenizerState.SELF_CLOSING_START_TAG:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						/*
						 * Consume the next input character:
						 */
						switch (c)
						{
							case '>':
								/*
								 * U+003E GREATER-THAN SIGN (>) Set the self-closing
								 * flag of the current tag token. Emit the current
								 * tag token.
								 */
								// [NOCPP[
								ErrHtml4XmlVoidSyntax();
								// ]NOCPP]
								//state = Transition(state, EmitCurrentTagToken(true, pos), reconsume, pos);
						        state = EmitCurrentTagToken(true, pos);
								if (shouldSuspend)
								{
									goto breakStateloop;
								}
								/*
								 * Switch to the data state.
								 */
								goto continueStateloop;
							default:
								/* Anything else Parse error. */
								ErrSlashNotFollowedByGt();
								/*
								 * Reconsume the character in the before attribute
								 * name state.
								 */
								//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
						        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;

								reconsume = true;
								goto continueStateloop;
						}
					// XXX reorder point
					case TokenizerState.ATTRIBUTE_VALUE_UNQUOTED:
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									AddAttributeWithValue();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before attribute name state.
									 */
									AddAttributeWithValue();
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
									goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * additional allowed character being U+003E
									 * GREATER-THAN SIGN (>)
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('>');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AddAttributeWithValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto case '<';
								// fall thru
								case '<':
								case '\"':
								case '\'':
								case '=':
								case '`':
									/*
									 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
									 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
									 * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
									 */
									ErrUnquotedAttributeValOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									// fall through
									goto default;
								default:
									// [NOCPP]
									ErrHtml4NonNameInUnquotedAttribute(c);
									// ]NOCPP]
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (unquoted) state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.AFTER_ATTRIBUTE_NAME:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after attribute name state.
									 */
									continue;
								case '/':
									/*
									 * U+002F SOLIDUS (/) Switch to the self-closing
									 * start tag state.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
							        state = TokenizerState.SELF_CLOSING_START_TAG;
									goto continueStateloop;
								case '=':
									/*
									 * U+003D EQUALS SIGN (=) Switch to the before
									 * attribute value state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
							        state = TokenizerState.BEFORE_ATTRIBUTE_VALUE;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * tag token.
									 */
									AddAttributeWithoutValue();
									//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
							        state = EmitCurrentTagToken(false, pos);
									if (shouldSuspend)
									{
										goto breakStateloop;
									}
									/*
									 * Switch to the data state.
									 */
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto case '\"';
								// fall thru
								case '\"':
								case '\'':
								case '<':
									ErrQuoteOrLtInAttributeNameOrNull(c);
									/*
									 * Treat it as per the "anything else" entry
									 * below.
									 */
									goto default;
								default:
									AddAttributeWithoutValue();
									/*
									 * Anything else Start a new attribute in the
									 * current tag token.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Set that
										 * attribute's name to the lowercase TokenizerState.version
										 * of the current input character (add
										 * 0x0020 to the character's code point)
										 */
										c += (char)0x20;
									}
									/*
									 * Set that attribute's name to the current
									 * input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * and its value to the empty string.
									 */
									// Will do later.
									/*
									 * Switch to the attribute name state.
									 */
									//state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
							        state = TokenizerState.ATTRIBUTE_NAME;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.MARKUP_DECLARATION_OPEN:
						/*markupdeclarationopenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * If the next two characters are both U+002D
							 * HYPHEN-MINUS characters (-), consume those two
							 * characters, create a comment token whose data is the
							 * empty string, and switch to the comment start state.
							 * 
							 * Otherwise, if the next seven characters are an ASCII
							 * case-insensitive match for the word "DOCTYPE", then
							 * consume those characters and switch to the DOCTYPE
							 * state.
							 * 
							 * Otherwise, if the insertion mode is
							 * "in foreign content" and the current node is not an
							 * element in the HTML namespace and the next seven
							 * characters are an case-sensitive match for the string
							 * "[CDATA[" (the five uppercase TokenizerState.letters "CDATA" with a
							 * U+005B LEFT SQUARE BRACKET character before and
							 * after), then consume those characters and switch to
							 * the CDATA section state.
							 * 
							 * Otherwise, is is a parse error. Switch to the bogus
							 * comment state. The next character that is consumed,
							 * if any, is the first character that will be in the
							 * comment.
							 */
							switch (c)
							{
								case '-':
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_HYPHEN;
									goto breakMarkupdeclarationopenloop;
								// goto continueStateloop;
								case 'd':
								case 'D':
									ClearLongStrBufAndAppend(c);
									index = 0;
									//state = Transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
							        state = TokenizerState.MARKUP_DECLARATION_OCTYPE;
									goto continueStateloop;
								case '[':
									if (TokenHandler.IsCDataSectionAllowed)
									{
										ClearLongStrBufAndAppend(c);
										index = 0;
										//state = Transition(state, Tokenizer.CDATA_START, reconsume, pos);
									    state = TokenizerState.CDATA_START;
										goto continueStateloop;
									}
									else
									{
										// else fall through
										goto default;
									}
								default:
									ErrBogusComment();
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakMarkupdeclarationopenloop:
						goto case TokenizerState.MARKUP_DECLARATION_HYPHEN;
					// FALLTHRU DON'T REORDER
					case TokenizerState.MARKUP_DECLARATION_HYPHEN:
						/*markupdeclarationhyphenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							switch (c)
							{
								case '\u0000':
									goto breakStateloop;
								case '-':
									ClearLongStrBuf();
									//state = Transition(state, Tokenizer.COMMENT_START, reconsume, pos);
							        state = TokenizerState.COMMENT_START;
									goto breakMarkupdeclarationhyphenloop;
								// goto continueStateloop;
								default:
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakMarkupdeclarationhyphenloop:
						goto case TokenizerState.COMMENT_START;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_START:
						/*commentstartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment start state
							 * 
							 * 
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * start dash state.
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
							        state = TokenizerState.COMMENT_START_DASH;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrPrematureEndOfComment();
									/* Emit the comment token. */
									EmitComment(0, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;

									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									// state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;

									goto breakCommentstartloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the input character to
									 * the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;

									goto breakCommentstartloop;
								// goto continueStateloop;
							}
						}
					breakCommentstartloop:
						goto case TokenizerState.COMMENT;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT:
						/*commentloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment state Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * end dash state
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
							        state = TokenizerState.COMMENT_END_DASH;
									goto breakCommentloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the input character to
									 * the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the comment state.
									 */
									continue;
							}
						}
					breakCommentloop:
						goto case TokenizerState.COMMENT_END_DASH;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_END_DASH:
						/*commentenddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end dash state Consume the next input
							 * character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Switch to the comment
									 * end state
									 */
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
							        state = TokenizerState.COMMENT_END;
									goto breakCommentenddashloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
							        state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									/*
									 * Anything else Append a U+002D HYPHEN-MINUS
									 * (-) character and the input character to the
									 * comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					breakCommentenddashloop:
						goto case TokenizerState.COMMENT_END;
					// FALLTHRU DON'T REORDER
					case TokenizerState.COMMENT_END:
						/*commentendloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end dash state Consume the next input
							 * character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the comment
									 * token.
									 */
									EmitComment(2, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									/* U+002D HYPHEN-MINUS (-) Parse error. */
									/*
									 * Append a U+002D HYPHEN-MINUS (-) character to
									 * the comment token's data.
									 */
									AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
									/*
									 * Stay in the comment end state.
									 */
									continue;
								case '\r':
									AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto breakStateloop;
								case '\n':
									AdjustDoubleHyphenAndAppendToLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
								case '!':
									ErrHyphenHyphenBang();
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
							        state = TokenizerState.COMMENT_END_BANG;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Append two U+002D HYPHEN-MINUS (-) characters
									 * and the input character to the comment
									 * token's data.
									 */
									AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.COMMENT_END_BANG:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							/*
							 * Comment end bang state
							 * 
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the comment
									 * token.
									 */
									EmitComment(3, pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									/*
									 * Append two U+002D HYPHEN-MINUS (-) characters
									 * and a U+0021 EXCLAMATION MARK (!) character
									 * to the comment token's data.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment end dash state.
									 */
									//state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                    state = TokenizerState.COMMENT_END_DASH;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append two U+002D HYPHEN-MINUS
									 * (-) characters, a U+0021 EXCLAMATION MARK (!)
									 * character, and the input character to the
									 * comment token's data. Switch to the comment
									 * state.
									 */
									AppendLongStrBuf(c);
									/*
									 * Switch to the comment state.
									 */
									//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.COMMENT;
									goto continueStateloop;
							}
						}
					// XXX reorder point
					case TokenizerState.COMMENT_START_DASH:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						/*
						 * Comment start dash state
						 * 
						 * Consume the next input character:
						 */
						switch (c)
						{
							case '-':
								/*
								 * U+002D HYPHEN-MINUS (-) Switch to the comment end
								 * state
								 */
								AppendLongStrBuf(c);
								//state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                state = TokenizerState.COMMENT_END;
								goto continueStateloop;
							case '>':
								ErrPrematureEndOfComment();
								/* Emit the comment token. */
								EmitComment(1, pos);
								/*
								 * Switch to the data state.
								 */
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							case '\r':
								AppendLongStrBufCarriageReturn();
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto breakStateloop;
							case '\n':
								AppendLongStrBufLineFeed();
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto continueStateloop;
							case '\u0000':
								c = '\uFFFD';
								// fall thru
								goto default;
							default:
								/*
								 * Append a U+002D HYPHEN-MINUS character (-) and
								 * the current input character to the comment
								 * token's data.
								 */
								AppendLongStrBuf(c);
								/*
								 * Switch to the comment state.
								 */
								//state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                state = TokenizerState.COMMENT;
								goto continueStateloop;
						}
					// XXX reorder point
					case TokenizerState.CDATA_START:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							if (index < 6)
							{ // CDATA_LSQB.Length
								if (c == Tokenizer.CDATA_LSQB[index])
								{
									AppendLongStrBuf(c);
								}
								else
								{
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
								cstart = pos; // start coalescing
								//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                state = TokenizerState.CDATA_SECTION;
								reconsume = true;
								break; // FALL THROUGH goto continueStateloop;
							}
						}
						goto case TokenizerState.CDATA_SECTION;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_SECTION:
						/*cdatasectionloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							switch (c)
							{
								case ']':
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
							        state = TokenizerState.CDATA_RSQB;
									goto breakCdatasectionloop; // FALL THROUGH
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								// fall thru
								default:
									continue;
							}
						}
					breakCdatasectionloop:
						goto case TokenizerState.CDATA_RSQB;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_RSQB:
						/*cdatarsqb:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
                            c = buf[pos];
							switch (c)
							{
								case ']':
									//state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
							        state = TokenizerState.CDATA_RSQB_RSQB;

									goto breakCdatarsqb;
								default:
									TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 1);
									cstart = pos;
									//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
							        state = TokenizerState.CDATA_SECTION;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakCdatarsqb:
						goto case TokenizerState.CDATA_RSQB_RSQB;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CDATA_RSQB_RSQB:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
                        c = buf[pos];
						switch (c)
						{
							case '>':
								cstart = pos + 1;
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							default:
								TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 2);
								cstart = pos;
								//state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
						        state = TokenizerState.CDATA_SECTION;

								reconsume = true;
								goto continueStateloop;

						}
					// XXX reorder point
					case TokenizerState.ATTRIBUTE_VALUE_SINGLE_QUOTED:
						/*attributevaluesinglequotedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
                                c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * attribute value (quoted) state.
									 */
									AddAttributeWithValue();

									//state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
							        state = TokenizerState.AFTER_ATTRIBUTE_VALUE_QUOTED;
									goto continueStateloop;
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in attribute value state, with the
									 * + additional allowed character being U+0027
									 * APOSTROPHE (').
									 */
									ClearStrBufAndAppend(c);
									SetAdditionalAndRememberAmpersandLocation('\'');
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto breakAttributevaluesinglequotedloop;
								// goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									goto default;
								// fall thru
								default:
									/*
									 * Anything else Append the current input
									 * character to the current attribute's value.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the attribute value (double-quoted)
									 * state.
									 */
									continue;
							}
						}
					breakAttributevaluesinglequotedloop:
						goto case TokenizerState.CONSUME_CHARACTER_REFERENCE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.CONSUME_CHARACTER_REFERENCE:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						if (c == '\u0000')
						{
							goto breakStateloop;
						}
						/*
						 * Unlike the definition is the spec, this state does not
						 * return a value and never requires the caller to
						 * backtrack. This state takes care of emitting characters
						 * or appending to the current attribute value. It also
						 * takes care of that in the case TokenizerState.when consuming the
						 * character reference fails.
						 */
						/*
						 * This section defines how to consume a character
						 * reference. This definition is used when parsing character
						 * references in text and in attributes.
						 * 
						 * The behavior depends on the identity of the next
						 * character (the one immediately after the U+0026 AMPERSAND
						 * character):
						 */
						switch (c)
						{
							case ' ':
							case '\t':
							case '\n':
							case '\r': // we'll reconsume!
							case '\u000C':
							case '<':
							case '&':
								EmitOrAppendStrBuf(returnState);
								//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
								{
									cstart = pos;
								}
								//state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
								reconsume = true;
								goto continueStateloop;
							case '#':
								/*
								 * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
								 * SIGN.
								 */
								AppendStrBuf('#');
								//state = Transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
						        state = TokenizerState.CONSUME_NCR;
								goto continueStateloop;
							default:
								if (c == additional)
								{
									EmitOrAppendStrBuf(returnState);
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								if (c >= 'a' && c <= 'z')
								{
									firstCharKey = c - 'a' + 26;
								}
								else if (c >= 'A' && c <= 'Z')
								{
									firstCharKey = c - 'A';
								}
								else
								{
									// No match
									/*
									 * If no match can be made, then this is a parse
									 * error.
									 */
									ErrNoNamedCharacterMatch();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
								    if (((byte)returnState & DATA_AND_RCDATA_MASK) !=0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								// Didn't fail yet
								AppendStrBuf(c);
								//state = Transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
						        state = TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;

								// FALL THROUGH goto continueStateloop;
								break;
						}
						goto case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP:
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (c == '\u0000')
							{
								goto breakStateloop;
							}
							/*
							 * The data structure is as follows:
							 * 
							 * HILO_ACCEL is a two-dimensional int array whose major
							 * index corresponds to the second character of the
							 * character reference (code point as index) and the
							 * minor index corresponds to the first character of the
							 * character reference (packed so that A-Z runs from 0
							 * to 25 and a-z runs from 26 to 51). This layout makes
							 * it easier to use the sparseness of the data structure
							 * to omit parts of it: The second dimension of the
							 * table is null when no character reference starts with
							 * the character corresponding to that row.
							 * 
							 * The int value HILO_ACCEL (by these indeces) is zero
							 * if there exists no character reference starting with
							 * that two-letter prefix. Otherwise, the value is an
							 * int that packs two shorts so that the higher short is
							 * the index of the highest character reference name
							 * with that prefix in NAMES and the lower short
							 * corresponds to the index of the lowest character
							 * reference name with that prefix. (It happens that the
							 * first two character reference names share their
							 * prefix so the packed int cannot be 0 by packing the
							 * two shorts.)
							 * 
							 * NAMES is an array of byte arrays where each byte
							 * array encodes the name of a character references as
							 * ASCII. The names omit the first two letters of the
							 * name. (Since storing the first two letters would be
							 * redundant with the data contained in HILO_ACCEL.) The
							 * entries are lexically sorted.
							 * 
							 * For a given index in NAMES, the same index in VALUES
							 * contains the corresponding expansion as an array of
							 * two UTF-16 code units (either the character and
							 * U+0000 or a suggogate pair).
							 */
							int hilo = 0;
							if (c <= 'z')
							{
								int[] row = NamedCharactersAccel.HILO_ACCEL[c];
								if (row != null)
								{
									hilo = row[firstCharKey];
								}
							}
							if (hilo == 0)
							{
								/*
								 * If no match can be made, then this is a parse
								 * error.
								 */
								ErrNoNamedCharacterMatch();
								EmitOrAppendStrBuf(returnState);
								//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
								{
									cstart = pos;
								}
								//state = Transition(state, returnState, reconsume, pos);
								state = returnState;
								reconsume = true;
								goto continueStateloop;
							}
							// Didn't fail yet
							AppendStrBuf(c);
							lo = hilo & 0xFFFF;
							hi = hilo >> 16;
							entCol = -1;
							candidate = -1;
							strBufMark = 0;
							//state = Transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
						    state = TokenizerState.CHARACTER_REFERENCE_TAIL;
							// FALL THROUGH goto continueStateloop;
							goto case TokenizerState.CHARACTER_REFERENCE_TAIL;
						}
					case TokenizerState.CHARACTER_REFERENCE_TAIL:
						/*outer:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (c == '\u0000')
							{
								goto breakStateloop;
							}
							entCol++;
							/*
							 * Consume the maximum number of characters possible,
							 * with the consumed characters matching one of the
							 * identifiers in the first column of the named
							 * character references table (in a case-sensitive
							 * manner).
							 */
							/*loloop:*/
							for (; ; )
							{
								if (hi < lo)
								{
									goto breakOuter;
								}
								if (entCol == NamedCharacters.NAMES[lo].Length)
								{
									candidate = lo;
									strBufMark = strBufLen;
									lo++;
								}
								else if (entCol > NamedCharacters.NAMES[lo].Length)
								{
									goto breakOuter;
								}
								else if (c > NamedCharacters.NAMES[lo][entCol])
								{
									lo++;
								}
								else
								{
									goto breakLoloop;
								}
							}

						breakLoloop:

							/*hiloop:*/
							for (; ; )
							{
								if (hi < lo)
								{
									goto breakOuter;
								}
								if (entCol == NamedCharacters.NAMES[hi].Length)
								{
									goto breakHiloop;
								}
								if (entCol > NamedCharacters.NAMES[hi].Length)
								{
									goto breakOuter;
								}
								else if (c < NamedCharacters.NAMES[hi][entCol])
								{
									hi--;
								}
								else
								{
									goto breakHiloop;
								}
							}

						breakHiloop:

							if (hi < lo)
							{
								goto breakOuter;
							}
							AppendStrBuf(c);
							continue;
						}

					breakOuter:

						if (candidate == -1)
						{
							// reconsume deals with CR, LF or nul
							/*
							 * If no match can be made, then this is a parse error.
							 */
							ErrNoNamedCharacterMatch();
							EmitOrAppendStrBuf(returnState);
							//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                            if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
							{
								cstart = pos;
							}
							//state = Transition(state, returnState, reconsume, pos);
							state = returnState;
							reconsume = true;
							goto continueStateloop;
						}
						else
						{
							// c can't be CR, LF or nul if we got here
							string candidateName = NamedCharacters.NAMES[candidate];
							if (candidateName.Length == 0
									|| candidateName[candidateName.Length - 1] != ';')
							{
								/*
								 * If the last character matched is not a U+003B
								 * SEMICOLON (;), there is a parse error.
								 */
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									/*
									 * If the entity is being consumed as part of an
									 * attribute, and the last character matched is
									 * not a U+003B SEMICOLON (;),
									 */
									char ch;
									if (strBufMark == strBufLen)
									{
										ch = c;
									}
									else
									{
										// if (strBufOffset != -1) {
										// ch = buf[strBufOffset + strBufMark];
										// } else {
										ch = strBuf[strBufMark];
										// }
									}
									if (ch == '=' || (ch >= '0' && ch <= '9')
											|| (ch >= 'A' && ch <= 'Z')
											|| (ch >= 'a' && ch <= 'z'))
									{
										/*
										 * and the next character is either a U+003D
										 * EQUALS SIGN character (=) or in the range
										 * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
										 * U+0041 LATIN CAPITAL LETTER A to U+005A
										 * LATIN CAPITAL LETTER Z, or U+0061 LATIN
										 * SMALL LETTER A to U+007A LATIN SMALL
										 * LETTER Z, then, for historical reasons,
										 * all the characters that were matched
										 * after the U+0026 AMPERSAND (&) must be
										 * unconsumed, and nothing is returned.
										 */
										ErrNoNamedCharacterMatch();
										AppendStrBufToLongStrBuf();
										//state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
										reconsume = true;
										goto continueStateloop;
									}
								}
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									ErrUnescapedAmpersandInterpretedAsCharacterReference();
								}
								else
								{
									ErrNotSemicolonTerminated();
								}
							}

							/*
							 * Otherwise, return a character token for the character
							 * corresponding to the entity name (as given by the
							 * second column of the named character references
							 * table).
							 */

							char[] val = NamedCharacters.VALUES[candidate];
							if (
								// [NOCPP[
							val.Length == 1
								// ]NOCPP]
								// CPPONLY: val[1] == 0
							)
							{
								EmitOrAppendOne(val, returnState);
							}
							else
							{
								EmitOrAppendTwo(val, returnState);
							}
							// this is so complicated!
							if (strBufMark < strBufLen)
							{
								// if (strBufOffset != -1) {
								// if ((returnState & (~1)) != 0) {
								// for (int i = strBufMark; i < strBufLen; i++) {
								// appendLongStrBuf(buf[strBufOffset + i]);
								// }
								// } else {
								// tokenHandler.Characters(buf, strBufOffset
								// + strBufMark, strBufLen
								// - strBufMark);
								// }
								// } else {
								//if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
								{
									for (int i = strBufMark; i < strBufLen; i++)
									{
										AppendLongStrBuf(strBuf[i]);
									}
								}
								else
								{
									TokenHandler.Characters(strBuf, strBufMark,
											strBufLen - strBufMark);
								}
								// }
							}
							//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                            if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
							{
								cstart = pos;
							}
							//state = Transition(state, returnState, reconsume, pos);
                            state = returnState;
							reconsume = true;
							goto continueStateloop;
							/*
							 * If the markup contains I'm &notit; I tell you, the
							 * entity is parsed as "not", as in, I'm Â¬it; I tell
							 * you. But if the markup was I'm &notin; I tell you,
							 * the entity would be parsed as "notin;", resulting in
							 * I'm âˆ‰ I tell you.
							 */
						}
					// XXX reorder point
					case TokenizerState.CONSUME_NCR:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						prevValue = -1;
						value = 0;
						seenDigits = false;
						/*
						 * The behavior further depends on the character after the
						 * U+0023 NUMBER SIGN:
						 */
						switch (c)
						{
							case 'x':
							case 'X':

								/*
								 * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
								 * LETTER X Consume the X.
								 * 
								 * Follow the steps below, but using the range of
								 * characters U+0030 DIGIT ZERO through to U+0039
								 * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
								 * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
								 * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
								 * LETTER F (in other words, 0-9, A-F, a-f).
								 * 
								 * When it comes to interpreting the number,
								 * interpret it as a hexadecimal number.
								 */
								AppendStrBuf(c);
								//state = Transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
						        state = TokenizerState.HEX_NCR_LOOP;

								goto continueStateloop;
							default:
								/*
								 * Anything else Follow the steps below, but using
								 * the range of characters U+0030 DIGIT ZERO through
								 * to U+0039 DIGIT NINE (i.e. just 0-9).
								 * 
								 * When it comes to interpreting the number,
								 * interpret it as a decimal number.
								 */
								//state = Transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
						        state = TokenizerState.DECIMAL_NRC_LOOP;
								reconsume = true;
								// FALL THROUGH goto continueStateloop;
								break;
						}
						// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
						goto case TokenizerState.DECIMAL_NRC_LOOP;
					case TokenizerState.DECIMAL_NRC_LOOP:
						/*decimalloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							// Deal with overflow gracefully
							if (value < prevValue)
							{
								value = 0x110000; // Value above Unicode range but
								// within int
								// range
							}
							prevValue = value;
							/*
							 * Consume as many characters as match the range of
							 * characters given above.
							 */
							if (c >= '0' && c <= '9')
							{
								seenDigits = true;
								value *= 10;
								value += c - '0';
								continue;
							}
							else if (c == ';')
							{
								if (seenDigits)
								{
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;

									// FALL THROUGH goto continueStateloop;
									goto breakDecimalloop;
								}
								else
								{
									ErrNoDigitsInNCR();
									AppendStrBuf(';');
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
										cstart = pos + 1;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;

									goto continueStateloop;
								}
							}
							else
							{
								/*
								 * If no characters match the range, then don't
								 * consume any characters (and unconsume the U+0023
								 * NUMBER SIGN character and, if appropriate, the X
								 * character). This is a parse error; nothing is
								 * returned.
								 * 
								 * Otherwise, if the next character is a U+003B
								 * SEMICOLON, consume that too. If it isn't, there
								 * is a parse error.
								 */
								if (!seenDigits)
								{
									ErrNoDigitsInNCR();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								else
								{
									ErrCharRefLacksSemicolon();
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
										cstart = pos;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;
									reconsume = true;
									// FALL THROUGH goto continueStateloop;
									goto breakDecimalloop;
								}
							}
						}
					breakDecimalloop:
						goto case TokenizerState.HANDLE_NCR_VALUE;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.HANDLE_NCR_VALUE:
						// WARNING previous state sets reconsume
						// XXX inline this case TokenizerState.if the method size can take it
						HandleNcrValue(returnState);
						//state = Transition(state, returnState, reconsume, pos);
						state = returnState;

						goto continueStateloop;
					// XXX reorder point
					case TokenizerState.HEX_NCR_LOOP:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							// Deal with overflow gracefully
							if (value < prevValue)
							{
								value = 0x110000; // Value above Unicode range but
								// within int
								// range
							}
							prevValue = value;
							/*
							 * Consume as many characters as match the range of
							 * characters given above.
							 */
							if (c >= '0' && c <= '9')
							{
								seenDigits = true;
								value *= 16;
								value += c - '0';
								continue;
							}
							else if (c >= 'A' && c <= 'F')
							{
								seenDigits = true;
								value *= 16;
								value += c - 'A' + 10;
								continue;
							}
							else if (c >= 'a' && c <= 'f')
							{
								seenDigits = true;
								value *= 16;
								value += c - 'a' + 10;
								continue;
							}
							else if (c == ';')
							{
								if (seenDigits)
								{
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
								    state = TokenizerState.HANDLE_NCR_VALUE;
									goto continueStateloop;
								}
								else
								{
									ErrNoDigitsInNCR();
									AppendStrBuf(';');
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos + 1;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									goto continueStateloop;
								}
							}
							else
							{
								/*
								 * If no characters match the range, then don't
								 * consume any characters (and unconsume the U+0023
								 * NUMBER SIGN character and, if appropriate, the X
								 * character). This is a parse error; nothing is
								 * returned.
								 * 
								 * Otherwise, if the next character is a U+003B
								 * SEMICOLON, consume that too. If it isn't, there
								 * is a parse error.
								 */
								if (!seenDigits)
								{
									ErrNoDigitsInNCR();
									EmitOrAppendStrBuf(returnState);
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								else
								{
									ErrCharRefLacksSemicolon();
									//if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
									{
										cstart = pos;
									}
									//state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                    state = TokenizerState.HANDLE_NCR_VALUE;
									reconsume = true;
									goto continueStateloop;
								}
							}
						}
					// XXX reorder point
					case TokenizerState.PLAINTEXT:
						/*plaintextloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '\u0000':
									EmitPlaintextReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * RAWTEXT state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.CLOSE_TAG_OPEN:
						if (++pos == endPos)
						{
							goto breakStateloop;
						}
						c = buf[pos];
						/*
						 * Otherwise, if the content model flag is set to the PCDATA
						 * state, or if the next few characters do match that tag
						 * name, consume the next input character:
						 */
						switch (c)
						{
							case '>':
								/* U+003E GREATER-THAN SIGN (>) Parse error. */
								ErrLtSlashGt();
								/*
								 * Switch to the data state.
								 */
								cstart = pos + 1;
								//state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                state = TokenizerState.DATA;
								goto continueStateloop;
							case '\r':
								SilentCarriageReturn();
								/* Anything else Parse error. */
								ErrGarbageAfterLtSlash();
								/*
								 * Switch to the bogus comment state.
								 */
								ClearLongStrBufAndAppend('\n');
								//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                state = TokenizerState.BOGUS_COMMENT;
								goto breakStateloop;
							case '\n':
								SilentLineFeed();
								/* Anything else Parse error. */
								ErrGarbageAfterLtSlash();
								/*
								 * Switch to the bogus comment state.
								 */
								ClearLongStrBufAndAppend('\n');
								//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                state = TokenizerState.BOGUS_COMMENT;
								goto continueStateloop;
							case '\u0000':
								c = '\uFFFD';
								// fall thru
								goto default;
							default:
								if (c >= 'A' && c <= 'Z')
								{
									c += (char)0x20;
								}
								if (c >= 'a' && c <= 'z')
								{
									/*
									 * U+0061 LATIN SMALL LETTER A through to U+007A
									 * LATIN SMALL LETTER Z Create a new end tag
									 * token,
									 */
									endTag = true;
									/*
									 * set its tag name to the input character,
									 */
									ClearStrBufAndAppend(c);
									/*
									 * then switch to the tag name state. (Don't
									 * emit the token yet; further details will be
									 * filled in before it is emitted.)
									 */
									//state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
									state = TokenizerState.TAG_NAME;
									goto continueStateloop;
								}
								else
								{
									/* Anything else Parse error. */
									ErrGarbageAfterLtSlash();
									/*
									 * Switch to the bogus comment state.
									 */
									ClearLongStrBufAndAppend(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
								    state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								}
						}
					// XXX reorder point
					case TokenizerState.RCDATA:
						/*rcdataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '&':
									/*
									 * U+0026 AMPERSAND (&) Switch to the character
									 * reference in RCDATA state.
									 */
									FlushChars(buf, pos);
									ClearStrBufAndAppend(c);
									additional = '\u0000';
									returnState = state;
									//state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
							        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * RCDATA less-than sign state.
									 */
									FlushChars(buf, pos);

									returnState = state;
									//state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Emit the current input character as a
									 * character token. Stay in the RCDATA state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.RAWTEXT:
						/*rawtextloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * RAWTEXT less-than sign state.
									 */
									FlushChars(buf, pos);

									returnState = state;
									//state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
									goto breakRawtextloop;
								// FALL THRU goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Emit the current input character as a
									 * character token. Stay in the RAWTEXT state.
									 */
									continue;
							}
						}
					breakRawtextloop:
						goto case TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN;
					// XXX fallthru don't reorder
					case TokenizerState.RAWTEXT_RCDATA_LESS_THAN_SIGN:
						/*rawtextrcdatalessthansignloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto breakRawtextrcdatalessthansignloop;
								// FALL THRU goto continueStateloop;
								default:
									/*
									 * Otherwise, emit a U+003C LESS-THAN SIGN
									 * character token
									 */
									TokenHandler.Characters(Tokenizer.LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, returnState, reconsume, pos);
									state = returnState;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakRawtextrcdatalessthansignloop:
						goto case TokenizerState.NON_DATA_END_TAG_NAME;
					// XXX fall thru. don't reorder.
					case TokenizerState.NON_DATA_END_TAG_NAME:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * ASSERT! when entering this state, set index to 0 and
							 * call clearStrBuf() assert (contentModelElement !=
							 * null); Let's implement the above without lookahead.
							 * strBuf is the 'temporary buffer'.
							 */
							if (index < endTagExpectationAsArray.Length)
							{
								char e = endTagExpectationAsArray[index];
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != e)
								{
									// [NOCPP[
									ErrHtml4LtSlashInRcdata(folded);
									// ]NOCPP]
									TokenHandler.Characters(Tokenizer.LT_SOLIDUS,
											0, 2);
									EmitStrBuf();
									cstart = pos;
									//state = Transition(state, returnState, reconsume, pos);
								    state = returnState;
									reconsume = true;
									goto continueStateloop;
								}
								AppendStrBuf(c);
								index++;
								continue;
							}
							else
							{
								endTag = true;
								// XXX replace contentModelElement with different
								// type
								tagName = endTagExpectation;
								switch (c)
								{
									case '\r':
										SilentCarriageReturn();
										//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
								        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;

										goto breakStateloop;
									case '\n':
										SilentLineFeed();
										goto case ' ';
									// fall thru
									case ' ':
									case '\t':
									case '\u000C':
										/*
										 * U+0009 CHARACTER TABULATION U+000A LINE
										 * FEED (LF) U+000C FORM FEED (FF) U+0020
										 * SPACE If the current end tag token is an
										 * appropriate end tag token, then switch to
										 * the before attribute name state.
										 */
										//state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
								        state = TokenizerState.BEFORE_ATTRIBUTE_NAME;
										goto continueStateloop;
									case '/':
										/*
										 * U+002F SOLIDUS (/) If the current end tag
										 * token is an appropriate end tag token,
										 * then switch to the self-closing start tag
										 * state.
										 */
										//state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
								        state = TokenizerState.SELF_CLOSING_START_TAG;
										goto continueStateloop;
									case '>':
										/*
										 * U+003E GREATER-THAN SIGN (>) If the
										 * current end tag token is an appropriate
										 * end tag token, then emit the current tag
										 * token and switch to the data state.
										 */
										//state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
								        state = EmitCurrentTagToken(false, pos);
										if (shouldSuspend)
										{
											goto breakStateloop;
										}
										goto continueStateloop;
									default:
										/*
										 * Emit a U+003C LESS-THAN SIGN character
										 * token, a U+002F SOLIDUS character token,
										 * a character token for each of the
										 * characters in the temporary buffer (in
										 * the order they were added to the buffer),
										 * and reconsume the current input character
										 * in the RAWTEXT state.
										 */
										// [NOCPP[
										ErrWarnLtSlashInRcdata();
										// ]NOCPP]
										TokenHandler.Characters(LT_SOLIDUS, 0, 2);
										EmitStrBuf();
										if (c == '\u0000')
										{
											EmitReplacementCharacter(buf, pos);
										}
										else
										{
											cstart = pos; // don't drop the
											// character
										}
										//state = Transition(state, returnState, reconsume, pos);
								        state = returnState;
										goto continueStateloop;
								}
							}
						}
					// XXX reorder point
					// BEGIN HOTSPOT WORKAROUND
					case TokenizerState.BOGUS_COMMENT:
						/*boguscommentloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume every character up to and including the first
							 * U+003E GREATER-THAN SIGN character (>) or the end of
							 * the file (EOF), whichever comes first. Emit a comment
							 * token whose data is the concatenation of all the
							 * characters starting from and including the character
							 * that caused the state machine to switch into the
							 * bogus comment state, up to and including the
							 * character immediately before the last consumed
							 * character (i.e. up to the character just before the
							 * U+003E or EOF character). (If the comment was started
							 * by the end of the file (EOF), the token is empty.)
							 * 
							 * Switch to the data state.
							 * 
							 * If the end of the file was reached, reconsume the EOF
							 * character.
							 */
							switch (c)
							{
								case '>':
									EmitComment(0, pos);
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT_HYPHEN;
									goto breakBoguscommentloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									AppendLongStrBuf(c);
									continue;
							}
						}
					breakBoguscommentloop:
						goto case TokenizerState.BOGUS_COMMENT_HYPHEN;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BOGUS_COMMENT_HYPHEN:
						/*boguscommenthyphenloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '>':
									// [NOCPP[
									MaybeAppendSpaceToBogusComment();
									// ]NOCPP]
									EmitComment(0, pos);
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '-':
									AppendSecondHyphenToBogusComment();
									goto continueBoguscommenthyphenloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									AppendLongStrBuf(c);
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
							        state = TokenizerState.BOGUS_COMMENT;
									goto continueStateloop;
							}
						continueBoguscommenthyphenloop:
							continue;
						}

					// XXX reorder point
					case TokenizerState.SCRIPT_DATA:
						/*scriptdataloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							switch (c)
							{
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data less-than sign state.
									 */
									FlushChars(buf, pos);
									returnState = state;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN;
									goto breakScriptdataloop; // FALL THRU continue
								// stateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data state.
									 */
									continue;
							}
						}
					breakScriptdataloop:
						goto case TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_LESS_THAN_SIGN:
						/*scriptdatalessthansignloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto continueStateloop;
								case '!':
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPE_START;

									goto breakScriptdatalessthansignloop; // FALL THRU
								// continue
								// stateloop;
								default:
									/*
									 * Otherwise, emit a U+003C LESS-THAN SIGN
									 * character token
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									/*
									 * and reconsume the current input character in
									 * the data state.
									 */
									cstart = pos;
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdatalessthansignloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPE_START;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPE_START:
						/*scriptdataescapestartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escape start dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH;
									goto breakScriptdataescapestartloop; // FALL THRU
								// continue
								// stateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdataescapestartloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPE_START_DASH:
						/*scriptdataescapestartdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
									goto breakScriptdataescapestartdashloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									reconsume = true;
									goto continueStateloop;
							}
						}
					breakScriptdataescapestartdashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH:
						/*scriptdataescapeddashdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Stay in the
									 * script data escaped dash dash state.
									 */
									continue;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
									 * GREATER-THAN SIGN character token. Switch to
									 * the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakScriptdataescapeddashdashloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakScriptdataescapeddashdashloop;
								// goto continueStateloop;
							}
						}
					breakScriptdataescapeddashdashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED:
						/*scriptdataescapedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH;
									goto breakScriptdataescapedloop; // FALL THRU
								// continue
								// stateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data escaped state.
									 */
									continue;
							}
						}
					breakScriptdataescapedloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_DASH:
						/*scriptdataescapeddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_DASH_DASH;
									goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Switch to the
									 * script data escaped less-than sign state.
									 */
									FlushChars(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
									goto breakScriptdataescapeddashloop;
								// goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdataescapeddashloop:
						goto case TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
						/*scriptdataescapedlessthanloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Set the temporary buffer
									 * to the empty string. Switch to the script
									 * data escaped end tag open state.
									 */
									index = 0;
									ClearStrBuf();
									returnState = TokenizerState.SCRIPT_DATA_ESCAPED;
                                    
									//state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
							        state = TokenizerState.NON_DATA_END_TAG_NAME;
									goto continueStateloop;
								case 'S':
								case 's':
									/*
									 * U+0041 LATIN CAPITAL LETTER A through to
									 * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
									 * LESS-THAN SIGN character token and the
									 * current input character as a character token.
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									index = 1;
									/*
									 * Set the temporary buffer to the empty string.
									 * Append the lowercase TokenizerState.version of the current
									 * input character (add 0x0020 to the
									 * character's code point) to the temporary
									 * buffer. Switch to the script data double
									 * escape start state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START;
									goto breakScriptdataescapedlessthanloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Emit a U+003C LESS-THAN SIGN
									 * character token and reconsume the current
									 * input character in the script data escaped
									 * state.
									 */
									TokenHandler.Characters(LT_GT, 0, 1);
									cstart = pos;
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdataescapedlessthanloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_START:
						/*scriptdatadoubleescapestartloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							Debug.Assert(index > 0);
							if (index < 6)
							{ // SCRIPT_ARR.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.SCRIPT_ARR[index])
								{
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
								    state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							switch (c)
							{
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
								case '/':
								case '>':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
									 * (>) Emit the current input character as a
									 * character token. If the temporary buffer is
									 * the string "script", then switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakScriptdatadoubleescapestartloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data escaped state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapestartloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED:
						/*scriptdatadoubleescapedloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data double escaped dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
									goto breakScriptdatadoubleescapedloop; // FALL THRU
								// continue
								// stateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									continue;
								case '\r':
									EmitCarriageReturn(buf, pos);
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Stay in the
									 * script data double escaped state.
									 */
									continue;
							}
						}
					breakScriptdatadoubleescapedloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
						/*scriptdatadoubleescapeddashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Switch to the
									 * script data double escaped dash dash state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
									goto breakScriptdatadoubleescapeddashloop;
								// goto continueStateloop;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapeddashloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
						/*scriptdatadoubleescapeddashdashloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '-':
									/*
									 * U+002D HYPHEN-MINUS (-) Emit a U+002D
									 * HYPHEN-MINUS character token. Stay in the
									 * script data double escaped dash dash state.
									 */
									continue;
								case '<':
									/*
									 * U+003C LESS-THAN SIGN (<) Emit a U+003C
									 * LESS-THAN SIGN character token. Switch to the
									 * script data double escaped less-than sign
									 * state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
									goto breakScriptdatadoubleescapeddashdashloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
									 * GREATER-THAN SIGN character token. Switch to
									 * the script data state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA;
									goto continueStateloop;
								case '\u0000':
									EmitReplacementCharacter(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto default;
								default:
									/*
									 * Anything else Emit the current input
									 * character as a character token. Switch to the
									 * script data double escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapeddashdashloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
						/*scriptdatadoubleescapedlessthanloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '/':
									/*
									 * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
									 * character token. Set the temporary buffer to
									 * the empty string. Switch to the script data
									 * double escape end state.
									 */
									index = 0;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END;
									goto breakScriptdatadoubleescapedlessthanloop;
								default:
									/*
									 * Anything else Reconsume the current input
									 * character in the script data double escaped
									 * state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}
					breakScriptdatadoubleescapedlessthanloop:
						goto case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END;
					// WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
					case TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPE_END:
						/*scriptdatadoubleescapeendloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (index < 6)
							{ // SCRIPT_ARR.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.SCRIPT_ARR[index])
								{
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
								    state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							switch (c)
							{
								case '\r':
									EmitCarriageReturn(buf, pos);
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
								case '/':
								case '>':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
									 * (>) Emit the current input character as a
									 * character token. If the temporary buffer is
									 * the string "script", then switch to the
									 * script data escaped state.
									 */
									//state = Transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_ESCAPED;
									goto continueStateloop;
								default:
									/*
									 * Reconsume the current input character in the
									 * script data double escaped state.
									 */
									reconsume = true;
									//state = Transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
							        state = TokenizerState.SCRIPT_DATA_DOUBLE_ESCAPED;
									goto continueStateloop;
							}
						}

					// XXX reorder point
					case TokenizerState.MARKUP_DECLARATION_OCTYPE:
						/*markupdeclarationdoctypeloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							if (index < 6)
							{ // OCTYPE.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded == Tokenizer.OCTYPE[index])
								{
									AppendLongStrBuf(c);
								}
								else
								{
									ErrBogusComment();
									//state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
								    state = TokenizerState.BOGUS_COMMENT;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
							    // state = Transition(state, Tokenizer.DOCTYPE, reconsume, pos);
							    state = TokenizerState.DOCTYPE;
								reconsume = true;
								goto breakMarkupdeclarationdoctypeloop;
								// goto continueStateloop;
							}
						}
					breakMarkupdeclarationdoctypeloop:
						goto case TokenizerState.DOCTYPE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE:
						/*doctypeloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							InitDoctypeFields();
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									goto breakDoctypeloop;
								// goto continueStateloop;
								default:
									/*
									 * Anything else Parse error.
									 */
									ErrMissingSpaceBeforeDoctypeName();
									/*
									 * Reconsume the current character in the before
									 * DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_NAME;
									reconsume = true;
									goto breakDoctypeloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeloop:
						goto case TokenizerState.BEFORE_DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_NAME:
						/*beforedoctypenameloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE name state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrNamelessDoctype();
									/*
									 * Create a new DOCTYPE token. Set its
									 * force-quirks flag to on.
									 */
									forceQuirks = true;
									/*
									 * Emit the token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									if (c >= 'A' && c <= 'Z')
									{
										/*
										 * U+0041 LATIN CAPITAL LETTER A through to
										 * U+005A LATIN CAPITAL LETTER Z Create a
										 * new DOCTYPE token. Set the token's name
										 * to the lowercase TokenizerState.version of the input
										 * character (add 0x0020 to the character's
										 * code point).
										 */
										c += (char)0x20;
									}
									/* Anything else Create a new DOCTYPE token. */
									/*
									 * Set the token's name name to the current
									 * input character.
									 */
									ClearStrBufAndAppend(c);
									/*
									 * Switch to the DOCTYPE name state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.DOCTYPE_NAME;
									goto breakBeforedoctypenameloop;
								// goto continueStateloop;
							}
						}
					breakBeforedoctypenameloop:
						goto case TokenizerState.DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_NAME:
						/*doctypenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									StrBufToDoctypeName();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_NAME;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the after DOCTYPE name state.
									 */
									StrBufToDoctypeName();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_NAME;
									goto breakDoctypenameloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									StrBufToDoctypeName();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * U+0041 LATIN CAPITAL LETTER A through to
									 * U+005A LATIN CAPITAL LETTER Z Append the
									 * lowercase TokenizerState.version of the input character (add
									 * 0x0020 to the character's code point) to the
									 * current DOCTYPE token's name.
									 */
									if (c >= 'A' && c <= 'Z')
									{
										c += (char)0x0020;
									}
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * name.
									 */
									AppendStrBuf(c);
									/*
									 * Stay in the DOCTYPE name state.
									 */
									continue;
							}
						}
					breakDoctypenameloop:
						goto case TokenizerState.AFTER_DOCTYPE_NAME;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_NAME:
						/*afterdoctypenameloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after DOCTYPE name state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case 'p':
								case 'P':
									index = 0;
									//state = Transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
							        state = TokenizerState.DOCTYPE_UBLIC;

									goto breakAfterdoctypenameloop;
								// goto continueStateloop;
								case 's':
								case 'S':
									index = 0;
									//state = Transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
							        state = TokenizerState.DOCTYPE_YSTEM;
									goto continueStateloop;
								default:
									/*
									 * Otherwise, this is the parse error.
									 */
									BogusDoctype();

									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;

									goto continueStateloop;
							}
						}
					breakAfterdoctypenameloop:
						goto case TokenizerState.DOCTYPE_UBLIC;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_UBLIC:
						/*doctypeublicloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * If the six characters starting from the current input
							 * character are an ASCII case-insensitive match for the
							 * word "PUBLIC", then consume those characters and
							 * switch to the before DOCTYPE public identifier state.
							 */
							if (index < 5)
							{ // UBLIC.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != Tokenizer.UBLIC[index])
								{
									BogusDoctype();
									// forceQuirks = true;
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
								    state = TokenizerState.BOGUS_DOCTYPE;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								continue;
							}
							else
							{
								//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
							    state = TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD;
								reconsume = true;
								goto breakDoctypeublicloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeublicloop:
						goto case TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_PUBLIC_KEYWORD:
						/*afterdoctypepublickeywordloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE public
									 * identifier state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakAfterdoctypepublickeywordloop;
								// FALL THROUGH continue stateloop
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse Error.
									 */
									ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse Error.
									 */
									ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypepublickeywordloop:
						goto case TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
						/*beforedoctypepublicidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE public identifier
									 * state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's public identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
									goto breakBeforedoctypepublicidentifierloop;
								// goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * public identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBeforedoctypepublicidentifierloop:
						goto case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
						/*doctypepublicidentifierdoublequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * DOCTYPE public identifier state.
									 */
									publicIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
									goto breakDoctypepublicidentifierdoublequotedloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrGtInPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									publicIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * public identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
					breakDoctypepublicidentifierdoublequotedloop:
						goto case TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
						/*afterdoctypepublicidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
							        state = TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the between DOCTYPE public and
									 * system identifiers state.
									 */
									//state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
							        state = TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;

									goto breakAfterdoctypepublicidentifierloop;
								// goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse error.
									 */
									ErrNoSpaceBetweenPublicAndSystemIds();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse error.
									 */
									ErrNoSpaceBetweenPublicAndSystemIds();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypepublicidentifierloop:
						goto case TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
						/*betweendoctypepublicandsystemidentifiersloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the between DOCTYPE public and system
									 * identifiers state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's system identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto breakBetweendoctypepublicandsystemidentifiersloop;
								// goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * system identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBetweendoctypepublicandsystemidentifiersloop:
						goto case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
						/*doctypesystemidentifierdoublequotedloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Switch to the after
									 * DOCTYPE system identifier state.
									 */
									systemIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
									goto continueStateloop;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Parse error.
									 */
									ErrGtInSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									systemIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * system identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
                    // next 2 lines were unreachable; commented out
					//breakDoctypesystemidentifierdoublequotedloop:
					//	goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
						/*afterdoctypesystemidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									goto case ' ';
								// fall thru
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the after DOCTYPE system identifier state.
									 */
									continue;
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit the current
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									/*
									 * Switch to the bogus DOCTYPE state. (This does
									 * not set the DOCTYPE token's force-quirks flag
									 * to on.)
									 */
									BogusDoctypeWithoutQuirks();
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto breakAfterdoctypesystemidentifierloop;
								// goto continueStateloop;
							}
						}
					breakAfterdoctypesystemidentifierloop:
						goto case TokenizerState.BOGUS_DOCTYPE;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BOGUS_DOCTYPE:
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '>':
									/*
									 * U+003E GREATER-THAN SIGN (>) Emit that
									 * DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Stay in the bogus DOCTYPE
									 * state.
									 */
									continue;
							}
						}
					// XXX reorder point
					case TokenizerState.DOCTYPE_YSTEM:
						/*doctypeystemloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Otherwise, if the six characters starting from the
							 * current input character are an ASCII case-insensitive
							 * match for the word "SYSTEM", then consume those
							 * characters and switch to the before DOCTYPE system
							 * identifier state.
							 */
							if (index < 5)
							{ // YSTEM.Length
								char folded = c;
								if (c >= 'A' && c <= 'Z')
								{
									folded += (char)0x20;
								}
								if (folded != YSTEM[index])
								{
									BogusDoctype();
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
								    state = TokenizerState.BOGUS_DOCTYPE;
									reconsume = true;
									goto continueStateloop;
								}
								index++;
								goto continueStateloop;
							}
							else
							{
								//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
							    state = TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD;
								reconsume = true;
								goto breakDoctypeystemloop;
								// goto continueStateloop;
							}
						}
					breakDoctypeystemloop:
						goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD;
					// FALLTHRU DON'T REORDER
					case TokenizerState.AFTER_DOCTYPE_SYSTEM_KEYWORD:
						/*afterdoctypesystemkeywordloop:*/
						for (; ; )
						{
							if (reconsume)
							{
								reconsume = false;
							}
							else
							{
								if (++pos == endPos)
								{
									goto breakStateloop;
								}
								c = buf[pos];
							}
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;

									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
									 * Switch to the before DOCTYPE public
									 * identifier state.
									 */
									//state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
									goto breakAfterdoctypesystemkeywordloop;
								// FALL THROUGH continue stateloop
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Parse Error.
									 */
									ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's system identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Parse Error.
									 */
									ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
									/*
									 * Set the DOCTYPE token's public identifier to
									 * the empty string (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakAfterdoctypesystemkeywordloop:
						goto case TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
					// FALLTHRU DON'T REORDER
					case TokenizerState.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
						/*beforedoctypesystemidentifierloop:*/
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\r':
									SilentCarriageReturn();
									goto breakStateloop;
								case '\n':
									SilentLineFeed();
									// fall thru
									goto case ' ';
								case ' ':
								case '\t':
								case '\u000C':
									/*
									 * U+0009 CHARACTER TABULATION U+000A LINE FEED
									 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
									 * in the before DOCTYPE system identifier
									 * state.
									 */
									continue;
								case '"':
									/*
									 * U+0022 QUOTATION MARK (") Set the DOCTYPE
									 * token's system identifier to the empty string
									 * (not missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
									goto continueStateloop;
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
									 * system identifier to the empty string (not
									 * missing),
									 */
									ClearLongStrBuf();
									/*
									 * then switch to the DOCTYPE system identifier
									 * (single-quoted) state.
									 */
									//state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
							        state = TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
									goto breakBeforedoctypesystemidentifierloop;
								// goto continueStateloop;
								case '>':
									/* U+003E GREATER-THAN SIGN (>) Parse error. */
									ErrExpectedSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								default:
									BogusDoctype();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									// done by bogusDoctype();
									/*
									 * Switch to the bogus DOCTYPE state.
									 */
									//state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
							        state = TokenizerState.BOGUS_DOCTYPE;
									goto continueStateloop;
							}
						}
					breakBeforedoctypesystemidentifierloop:
						goto case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
					// FALLTHRU DON'T REORDER
					case TokenizerState.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * DOCTYPE system identifier state.
									 */
									systemIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
									goto continueStateloop;
								case '>':
									ErrGtInSystemId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									systemIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * system identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE system identifier
									 * (double-quoted) state.
									 */
									continue;
							}
						}
					// XXX reorder point

					case TokenizerState.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
						for (; ; )
						{
							if (++pos == endPos)
							{
								goto breakStateloop;
							}
							c = buf[pos];
							/*
							 * Consume the next input character:
							 */
							switch (c)
							{
								case '\'':
									/*
									 * U+0027 APOSTROPHE (') Switch to the after
									 * DOCTYPE public identifier state.
									 */
									publicIdentifier = LongStrBufToString();
									//state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
							        state = TokenizerState.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
									goto continueStateloop;
								case '>':
									ErrGtInPublicId();
									/*
									 * Set the DOCTYPE token's force-quirks flag to
									 * on.
									 */
									forceQuirks = true;
									/*
									 * Emit that DOCTYPE token.
									 */
									publicIdentifier = LongStrBufToString();
									EmitDoctypeToken(pos);
									/*
									 * Switch to the data state.
									 */
									//state = Transition(state, Tokenizer.DATA, reconsume, pos);
							        state = TokenizerState.DATA;
									goto continueStateloop;
								case '\r':
									AppendLongStrBufCarriageReturn();
									goto breakStateloop;
								case '\n':
									AppendLongStrBufLineFeed();
									continue;
								case '\u0000':
									c = '\uFFFD';
									// fall thru
									goto default;
								default:
									/*
									 * Anything else Append the current input
									 * character to the current DOCTYPE token's
									 * public identifier.
									 */
									AppendLongStrBuf(c);
									/*
									 * Stay in the DOCTYPE public identifier
									 * (single-quoted) state.
									 */
									continue;
							}
						}
                        // XXX reorder point
                         case TokenizerState.PROCESSING_INSTRUCTION:
                            //processinginstructionloop: 
                            for (;;) {
                               if (++pos == endPos) {
                                   break;
                               }

                               c = buf[pos];
                               switch (c) {
                                   case '?':
                                       //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                       state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;
                                   
                                       break;
                                // continue stateloop;
                                default:
                                    continue;
                            }
                        }
                    //breakProcessingInstructionLoop:
                        break;


                case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                    if (++pos == endPos) {
                         goto breakStateloop;
                   }
                   c = buf[pos];
                   switch (c) {
                      case '>':
                           //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                           state = TokenizerState.DATA;
                           continue;
                        default:
                           //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                           state = TokenizerState.PROCESSING_INSTRUCTION;
                           continue;
                  }
					// END HOTSPOT WORKAROUND
				}
			} // stateloop


			breakStateloop:

			FlushChars(buf, pos);
			/*
			 * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
			 */
			// Save locals
			stateSave = state;
			returnStateSave = returnState;
			return pos;
		}

Example #35

Show file

File: IterationData.cs Project: mburgman101/CsQuery

        /// <summary>
        /// Returns a literal object for the text between HtmlStart (the last position of the end of a
        /// tag) and the current position. If !AllowLiterals then it's wrapped in a span.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against
        /// </param>
        /// <param name="literal">
        /// [out] The literal.
        /// </param>
        ///
        /// <returns>
        /// true if it succeeds, false if it fails.
        /// </returns>

        public bool TryGetLiteral(HtmlElementFactory factory, out IDomObject literal)
        {


            if (Pos <= HtmlStart)
            {
                literal = null;
                return false;
            }

            // There's plain text -return it as a literal.
            
            DomText lit;
            switch(InsertionMode) {
                case InsertionMode.Invalid:
                    lit = new DomInvalidElement();
                    break;
                case InsertionMode.Text:
                    InsertionMode =InsertionMode.Default;
                    lit = new DomInnerText();
                    break;
                default:
                    lit = new DomText();
                    break;
            }
            literal = lit;

            if (factory.IsBound)
            {
                lit.SetTextIndex(factory.Document, factory.Document.DocumentIndex.TokenizeString(HtmlStart, Pos - HtmlStart));
            }
            else
            {
                string text = factory.Html.SubstringBetween(HtmlStart, Pos);
                literal.NodeValue = HtmlData.HtmlDecode(text);
            }

            if (WrapLiterals)
            {
                DomElement wrapper = DomElement.Create("span");
                wrapper.ChildNodesInternal.AddAlways(literal);
                literal = wrapper;
            }
        

            if (Parent != null)
            {
                ((DomElement)Parent.Element).ChildNodesInternal.AddAlways(literal);
                Reset();
                return false;
            }
            else
            {
                TokenizerState = TokenizerState.Finished;
                return true;
            }
        }

Example #36

Show file

File: Tokenizer.Emit.cs Project: prepare/WebParser

 void EmitOrAppendTwo(char[] val, TokenizerState returnState)
 {
     //TODO: review here=>   use != or == ?
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
         AppendLongStrBuf(val[1]);
     }
     else
     {
         TokenListener.Characters(val, 0, 2);
     }
 }

Example #37

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		private TokenizerState EmitCurrentTagToken(bool selfClosing, int pos)
		{
			cstart = pos + 1;
			MaybeErrSlashInEndTag(selfClosing);
            stateSave = TokenizerState.DATA;
		    HtmlAttributes attrs = attributes ?? HtmlAttributes.EMPTY_ATTRIBUTES;

			if (endTag)
			{
				/*
				 * When an end tag token is emitted, the content model flag must be
				 * switched to the PCDATA state.
				 */
				MaybeErrAttributesOnEndTag(attrs);
				TokenHandler.EndTag(tagName);
			}
			else
			{
				TokenHandler.StartTag(tagName, attrs, selfClosing);
			}
			tagName = null;
			ResetAttributes();
			/*
			 * The token handler may have called setStateAndEndTagExpectation
			 * and changed stateSave since the start of this method.
			 */
			return stateSave;
		}

Example #38

Show file

 /// <summary>
 /// Initializes a new instance of the <see cref="UnclosedBlockCommentException"/> class.
 /// </summary>
 /// <param name="tokenizerState">State of the tokenizer.</param>
 /// <param name="batchSource">The batch source.</param>
 internal UnclosedBlockCommentException(TokenizerState tokenizerState, IBatchSource batchSource)
     : base(tokenizerState, batchSource)
 {
 }

Example #39

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		// ]NOCPP]

		// For the token handler to call
		/**
		 * Sets the tokenizer state and the associated element name. This should 
		 * only ever used to put the tokenizer into one of the states that have
		 * a special end tag expectation.
		 * 
		 * @param specialTokenizerState
		 *            the tokenizer state to set
		 * @param endTagExpectation
		 *            the expected end tag for transitioning back to normal
		 */
		public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
				[Local] String endTagExpectation)
		{
			this.stateSave = specialTokenizerState;
            if (specialTokenizerState == TokenizerState.DATA)
			{
				return;
			}
			char[] asArray = endTagExpectation.ToCharArray();
			this.endTagExpectation = ElementName.ElementNameByBuffer(asArray, 0, asArray.Length);
			EndTagExpectationToArray();
		}

Example #40

Show file

            IEnumerator <SqlString> IEnumerable <SqlString> .GetEnumerator()
            {
                TokenizerState state            = TokenizerState.WhiteSpace;
                int            parenthesisCount = 0;
                bool           escapeQuote      = false;
                int            tokenStart       = 0;
                int            tokenLength      = 0;
                string         originalString   = original.ToString();

                for (int i = 0; i < originalString.Length; i++)
                {
                    char ch = originalString[i];
                    switch (state)
                    {
                    case TokenizerState.WhiteSpace:
                        if (ch == '\'')
                        {
                            state        = TokenizerState.Quoted;
                            tokenLength += 1;
                        }
                        else if (ch == ',')
                        {
                            yield return(new SqlString(","));
                            //tokenLength += 1?
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            tokenLength     += 1;
                            parenthesisCount = 1;
                        }
                        else if (char.IsWhiteSpace(ch) == false)
                        {
                            state        = TokenizerState.Token;
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.Quoted:
                        if (escapeQuote)
                        {
                            escapeQuote  = false;
                            tokenLength += 1;
                        }
                        // handle escaping of ' by using '' or \'
                        else if (ch == '\\' || (ch == '\'' && i + 1 < originalString.Length && originalString[i + 1] == '\''))
                        {
                            escapeQuote  = true;
                            tokenLength += 1;
                        }
                        else if (ch == '\'')
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            tokenStart += tokenLength + 1;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.InParenthesis:
                        if (ch == ')' || ch == ']')
                        {
                            tokenLength      += 1;
                            parenthesisCount -= 1;
                            if (parenthesisCount == 0)
                            {
                                yield return(original.Substring(tokenStart, tokenLength));

                                tokenStart += tokenLength + 1;
                                tokenLength = 0;
                                state       = TokenizerState.WhiteSpace;
                            }
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            tokenLength      += 1;
                            parenthesisCount += 1;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    case TokenizerState.Token:
                        if (char.IsWhiteSpace(ch))
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            tokenStart += tokenLength + 1;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else if (ch == ',')                                 // stop current token, and send the , as well
                        {
                            yield return(original.Substring(tokenStart, tokenLength));

                            yield return(new SqlString(","));

                            tokenStart += tokenLength + 2;
                            tokenLength = 0;
                            state       = TokenizerState.WhiteSpace;
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            parenthesisCount = 1;
                            tokenLength     += 1;
                        }
                        else if (ch == '\'')
                        {
                            state        = TokenizerState.Quoted;
                            tokenLength += 1;
                        }
                        else
                        {
                            tokenLength += 1;
                        }
                        break;

                    default:
                        throw new InvalidExpressionException("Could not understand the string " + original);
                    }
                }
                if (tokenLength > 0)
                {
                    yield return(original.Substring(tokenStart, tokenLength));
                }
            }

Example #41

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		/**
		 * Sets the tokenizer state and the associated element name. This should 
		 * only ever used to put the tokenizer into one of the states that have
		 * a special end tag expectation.
		 * 
		 * @param specialTokenizerState
		 *            the tokenizer state to set
		 * @param endTagExpectation
		 *            the expected end tag for transitioning back to normal
		 */
		public void SetStateAndEndTagExpectation(TokenizerState specialTokenizerState,
				ElementName endTagExpectation)
		{
			this.stateSave = specialTokenizerState;
			this.endTagExpectation = endTagExpectation;
			EndTagExpectationToArray();
		}

Example #42

Show file

File: Tokenizer.StateLoop3_RawText_CData_RcRef.cs Project: prepare/WebParser

        void StateLoop3_RawText_CData_RcRef(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {
                 
                //*************
            continueStateloop:
                //*************

                switch (state)
                {   
                    // XXX reorder point
                    case TokenizerState.CDATA_START:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // CDATA_LSQB.Length
                                    if (c == Tokenizer.CDATA_LSQB[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    reader.StartCollect(); // start coalescing
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto case TokenizerState.s68_CDATA_SECTION;
                                    //break; // FALL THROUGH goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                            //------------------------------------

                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s68_CDATA_SECTION:
                        /*cdatasectionloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB;
                                        goto breakCdatasectionloop; // FALL THROUGH
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        continue;
                                }
                            }
                            goto breakStateloop;
                        //------------------------------------
                        breakCdatasectionloop:
                            goto case TokenizerState.CDATA_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB:
                        /*cdatarsqb:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        //state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB_RSQB;
                                        goto breakCdatarsqb;
                                    default:
                                        TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                        state = TokenizerState.s68_CDATA_SECTION;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------ 
                        breakCdatarsqb:
                            goto case TokenizerState.CDATA_RSQB_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB_RSQB:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            switch (c)
                            {
                                case '>':
                                    //cstart = pos + 1;
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                default:
                                    TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 2);
                                    reader.StartCollect();
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;

                            }
                        } 
                    // XXX reorder point
                    case TokenizerState.s07_PLAINTEXT:
                        /*plaintextloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\u0000':
                                        EmitPlaintextReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the current input
                                         * character as a character token. Stay in the
                                         * RAWTEXT state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        } 
                    // XXX reorder point
                    case TokenizerState.s03_RCDATA:
                        /*rcdataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in RCDATA state.
                                         */
                                        //FlushChars(buf, pos);
                                        FlushChars();
                                        ClearStrBufAndAppend(c);
                                        additional = '\u0000';
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * RCDATA less-than sign state.
                                         */
                                        //FlushChars(buf, pos);
                                        FlushChars();
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                                        goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Emit the current input character as a
                                         * character token. Stay in the RCDATA state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s05_RAWTEXT:
                        /*rawtextloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the
                                         * RAWTEXT less-than sign state.
                                         */
                                        FlushChars();

                                        returnState = state;
                                        //state = Transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
                                        state = TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                                        goto breakRawtextloop;
                                    // FALL THRU goto continueStateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Emit the current input character as a
                                         * character token. Stay in the RAWTEXT state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakRawtextloop:
                            goto case TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN;
                        }
                    // XXX fallthru don't reorder
                    case TokenizerState.s11_RAWTEXT_RCDATA_LESS_THAN_SIGN:
                        /*rawtextrcdatalessthansignloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Set the temporary buffer
                                         * to the empty string. Switch to the script
                                         * data end tag open state.
                                         */
                                        index = 0;
                                        ClearStrBuf();
                                        //state = Transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
                                        state = TokenizerState.NON_DATA_END_TAG_NAME;
                                        goto breakRawtextrcdatalessthansignloop;
                                    // FALL THRU goto continueStateloop;
                                    default:
                                        /*
                                         * Otherwise, emit a U+003C LESS-THAN SIGN
                                         * character token
                                         */
                                        TokenListener.Characters(Tokenizer.LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakRawtextrcdatalessthansignloop:
                            goto case TokenizerState.NON_DATA_END_TAG_NAME;
                        }
                    // XXX fall thru. don't reorder.
                    case TokenizerState.NON_DATA_END_TAG_NAME:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * ASSERT! when entering this state, set index to 0 and
                                 * call clearStrBuf() assert (contentModelElement !=
                                 * null); Let's implement the above without lookahead.
                                 * strBuf is the 'temporary buffer'.
                                 */
                                if (index < endTagExpectationAsArray.Length)
                                {
                                    char e = endTagExpectationAsArray[index];
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != e)
                                    {

                                        ErrHtml4LtSlashInRcdata(folded);
                                        TokenListener.Characters(Tokenizer.LT_SOLIDUS,
                                                0, 2);
                                        EmitStrBuf();
                                        reader.StartCollect();
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    AppendStrBuf(c);
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    endTag = true;
                                    // XXX replace contentModelElement with different
                                    // type
                                    tagName = endTagExpectation;
                                    switch (c)
                                    {
                                        case '\r':
                                            SilentCarriageReturn();
                                            //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                            state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;

                                            goto breakStateloop;
                                        case '\n':
                                        case ' ':
                                        case '\t':
                                        case '\u000C':
                                            /*
                                             * U+0009 CHARACTER TABULATION U+000A LINE
                                             * FEED (LF) U+000C FORM FEED (FF) U+0020
                                             * SPACE If the current end tag token is an
                                             * appropriate end tag token, then switch to
                                             * the before attribute name state.
                                             */
                                            //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                            state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                            goto continueStateloop;
                                        case '/':
                                            /*
                                             * U+002F SOLIDUS (/) If the current end tag
                                             * token is an appropriate end tag token,
                                             * then switch to the self-closing start tag
                                             * state.
                                             */
                                            //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                            state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                            goto continueStateloop;
                                        case '>':
                                            /*
                                             * U+003E GREATER-THAN SIGN (>) If the
                                             * current end tag token is an appropriate
                                             * end tag token, then emit the current tag
                                             * token and switch to the data state.
                                             */
                                            //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                            state = EmitCurrentTagToken(false);
                                            if (shouldSuspend)
                                            {
                                                goto breakStateloop;
                                            }
                                            goto continueStateloop;
                                        default:
                                            /*
                                             * Emit a U+003C LESS-THAN SIGN character
                                             * token, a U+002F SOLIDUS character token,
                                             * a character token for each of the
                                             * characters in the temporary buffer (in
                                             * the order they were added to the buffer),
                                             * and reconsume the current input character
                                             * in the RAWTEXT state.
                                             */
                                            // [NOCPP[
                                            ErrWarnLtSlashInRcdata();
                                            // ]NOCPP]
                                            TokenListener.Characters(LT_SOLIDUS, 0, 2);
                                            EmitStrBuf();
                                            if (c == '\u0000')
                                            {
                                                EmitReplacementCharacter();
                                            }
                                            else
                                            {
                                                reader.StartCollect(); // don't drop the
                                                // character
                                            }
                                            //state = Transition(state, returnState, reconsume, pos);
                                            state = returnState;
                                            goto continueStateloop;
                                    }
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        } 
                    case TokenizerState.PROCESSING_INSTRUCTION:
                        //processinginstructionloop: 
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '?':
                                        //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                        state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;

                                        break;
                                    // continue stateloop;
                                    default:
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    //breakProcessingInstructionLoop: 
                    case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;

                            }

                            switch (c)
                            {
                                case '>':
                                    //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    continue;
                                default:
                                    //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                                    state = TokenizerState.PROCESSING_INSTRUCTION;
                                    continue;
                            }

                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

       breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }

Example #43

Show file

File: Tokenizer.cs Project: antrampa/HtmlParserSharp

		public void ResetToDataState()
		{
			strBufLen = 0;
			longStrBufLen = 0;
            stateSave = TokenizerState.DATA;
			// line = 1; XXX line numbers
			lastCR = false;
			index = 0;
			forceQuirks = false;
			additional = '\u0000';
			entCol = -1;
			firstCharKey = -1;
			lo = 0;
			hi = 0; // will always be overwritten before use anyway
			candidate = -1;
			strBufMark = 0;
			prevValue = -1;
			value = 0;
			seenDigits = false;
			endTag = false;
            // Removed J. Treworgy 12/7/2012 - this should remain true so the parser can choose to abort 
			//shouldSuspend = false;
			InitDoctypeFields();
			if (tagName != null)
			{
				tagName = null;
			}
			if (attributeName != null)
			{
				attributeName = null;
			}
			// [NOCPP[
			if (newAttributesEachTime)
			{
				// ]NOCPP]
				if (attributes != null)
				{
					attributes = null;
				}
				// [NOCPP[
			}
			// ]NOCPP]
		}

Example #44

Show file

File: Tokenizer.StateLoop3_DocType.cs Project: prepare/WebParser

        void StateLoop3_DocType(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                  
                    // XXX reorder point
                    case TokenizerState.MARKUP_DECLARATION_OCTYPE:
                        /*markupdeclarationdoctypeloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // OCTYPE.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded == Tokenizer.OCTYPE[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    // state = Transition(state, Tokenizer.DOCTYPE, reconsume, pos);
                                    state = TokenizerState.s52_DOCTYPE;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto breakMarkupdeclarationdoctypeloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationdoctypeloop:
                            goto case TokenizerState.s52_DOCTYPE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s52_DOCTYPE:
                        /*doctypeloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                InitDoctypeFields();
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        goto breakDoctypeloop;
                                    // goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrMissingSpaceBeforeDoctypeName();
                                        /*
                                         * Reconsume the current character in the before
                                         * DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto breakDoctypeloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeloop:
                            goto case TokenizerState.s53_BEFORE_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s53_BEFORE_DOCTYPE_NAME:
                        /*beforedoctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE name state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrNamelessDoctype();
                                        /*
                                         * Create a new DOCTYPE token. Set its
                                         * force-quirks flag to on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit the token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Create a
                                             * new DOCTYPE token. Set the token's name
                                             * to the lowercase TokenizerState.version of the input
                                             * character (add 0x0020 to the character's
                                             * code point).
                                             */
                                            c += (char)0x20;
                                        }
                                        /* Anything else Create a new DOCTYPE token. */
                                        /*
                                         * Set the token's name name to the current
                                         * input character.
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * Switch to the DOCTYPE name state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s54_DOCTYPE_NAME;
                                        goto breakBeforedoctypenameloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypenameloop:
                            goto case TokenizerState.s54_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s54_DOCTYPE_NAME:
                        /*doctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        StrBufToDoctypeName();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s55_AFTER_DOCTYPE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the after DOCTYPE name state.
                                         */
                                        StrBufToDoctypeName();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
                                        state = TokenizerState.s55_AFTER_DOCTYPE_NAME;
                                        goto breakDoctypenameloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        StrBufToDoctypeName();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * U+0041 LATIN CAPITAL LETTER A through to
                                         * U+005A LATIN CAPITAL LETTER Z Append the
                                         * lowercase TokenizerState.version of the input character (add
                                         * 0x0020 to the character's code point) to the
                                         * current DOCTYPE token's name.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            c += (char)0x0020;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE name state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypenameloop:
                            goto case TokenizerState.s55_AFTER_DOCTYPE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s55_AFTER_DOCTYPE_NAME:
                        /*afterdoctypenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after DOCTYPE name state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case 'p':
                                    case 'P':
                                        index = 0;
                                        //state = Transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
                                        state = TokenizerState.DOCTYPE_UBLIC;

                                        goto breakAfterdoctypenameloop;
                                    // goto continueStateloop;
                                    case 's':
                                    case 'S':
                                        index = 0;
                                        //state = Transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
                                        state = TokenizerState.DOCTYPE_YSTEM;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Otherwise, this is the parse error.
                                         */
                                        BogusDoctype();

                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;

                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypenameloop:
                            goto case TokenizerState.DOCTYPE_UBLIC;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.DOCTYPE_UBLIC:
                        /*doctypeublicloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * If the six characters starting from the current input
                                 * character are an ASCII case-insensitive match for the
                                 * word "PUBLIC", then consume those characters and
                                 * switch to the before DOCTYPE public identifier state.
                                 */
                                if (index < 5)
                                { // UBLIC.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != Tokenizer.UBLIC[index])
                                    {
                                        BogusDoctype();
                                        // forceQuirks = true;
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
                                    state = TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD;
                                    //reconsume = true;
                                    reader.StepBack();

                                    goto breakDoctypeublicloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeublicloop:
                            goto case TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s56_AFTER_DOCTYPE_PUBLIC_KEYWORD:
                        /*afterdoctypepublickeywordloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE public
                                         * identifier state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakAfterdoctypepublickeywordloop;
                                    // FALL THROUGH continue stateloop
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypepublickeywordloop:
                            goto case TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s57_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
                        /*beforedoctypepublicidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE public identifier
                                         * state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's public identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                        goto breakBeforedoctypepublicidentifierloop;
                                    // goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * public identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypepublicidentifierloop:
                            goto case TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s58_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
                        /*doctypepublicidentifierdoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * DOCTYPE public identifier state.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto breakDoctypepublicidentifierdoublequotedloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrGtInPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * public identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypepublicidentifierdoublequotedloop:
                            goto case TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
                        /*afterdoctypepublicidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
                                        state = TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the between DOCTYPE public and
                                         * system identifiers state.
                                         */
                                        //state = Transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
                                        state = TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;

                                        goto breakAfterdoctypepublicidentifierloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse error.
                                         */
                                        ErrNoSpaceBetweenPublicAndSystemIds();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse error.
                                         */
                                        ErrNoSpaceBetweenPublicAndSystemIds();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypepublicidentifierloop:
                            goto case TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s61_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
                        /*betweendoctypepublicandsystemidentifiersloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the between DOCTYPE public and system
                                         * identifiers state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's system identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto breakBetweendoctypepublicandsystemidentifiersloop;
                                    // goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * system identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBetweendoctypepublicandsystemidentifiersloop:
                            goto case TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
                        /*doctypesystemidentifierdoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * DOCTYPE system identifier state.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrGtInSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * system identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // next 2 lines were unreachable; commented out
                    //breakDoctypesystemidentifierdoublequotedloop:
                    //	goto case TokenizerState.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
                        /*afterdoctypesystemidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after DOCTYPE system identifier state.
                                         */
                                        continue;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Switch to the bogus DOCTYPE state. (This does
                                         * not set the DOCTYPE token's force-quirks flag
                                         * to on.)
                                         */
                                        BogusDoctypeWithoutQuirks();
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto breakAfterdoctypesystemidentifierloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypesystemidentifierloop:
                            goto case TokenizerState.s67_BOGUS_DOCTYPE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s67_BOGUS_DOCTYPE:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit that
                                         * DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Stay in the bogus DOCTYPE
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.DOCTYPE_YSTEM:
                        /*doctypeystemloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Otherwise, if the six characters starting from the
                                 * current input character are an ASCII case-insensitive
                                 * match for the word "SYSTEM", then consume those
                                 * characters and switch to the before DOCTYPE system
                                 * identifier state.
                                 */
                                if (index < 5)
                                { // YSTEM.Length
                                    char folded = c;
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        folded += (char)0x20;
                                    }
                                    if (folded != YSTEM[index])
                                    {
                                        BogusDoctype();
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        reader.StepBack();
                                        //reconsume = true;
                                        goto continueStateloop;
                                    }
                                    index++;
                                    goto continueStateloop;
                                }
                                else
                                {
                                    //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
                                    state = TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto breakDoctypeystemloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakDoctypeystemloop:
                            goto case TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s62_AFTER_DOCTYPE_SYSTEM_KEYWORD:
                        /*afterdoctypesystemkeywordloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;

                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before DOCTYPE public
                                         * identifier state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto breakAfterdoctypesystemkeywordloop;
                                    // FALL THROUGH continue stateloop
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's system identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Parse Error.
                                         */
                                        ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                        /*
                                         * Set the DOCTYPE token's public identifier to
                                         * the empty string (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterdoctypesystemkeywordloop:
                            goto case TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s63_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
                        /*beforedoctypesystemidentifierloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before DOCTYPE system identifier
                                         * state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                         * token's system identifier to the empty string
                                         * (not missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s64_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                         * system identifier to the empty string (not
                                         * missing),
                                         */
                                        ClearLongStrBuf();
                                        /*
                                         * then switch to the DOCTYPE system identifier
                                         * (single-quoted) state.
                                         */
                                        //state = Transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                        goto breakBeforedoctypesystemidentifierloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                        ErrExpectedSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        BogusDoctype();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        // done by bogusDoctype();
                                        /*
                                         * Switch to the bogus DOCTYPE state.
                                         */
                                        //state = Transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
                                        state = TokenizerState.s67_BOGUS_DOCTYPE;
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforedoctypesystemidentifierloop:
                            goto case TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s65_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * DOCTYPE system identifier state.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s66_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        ErrGtInSystemId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        systemIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * system identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE system identifier
                                         * (double-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                            // XXX reorder point

                        }
                    case TokenizerState.s59_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * DOCTYPE public identifier state.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        //state = Transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                        state = TokenizerState.s60_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
                                        goto continueStateloop;
                                    case '>':
                                        ErrGtInPublicId();
                                        /*
                                         * Set the DOCTYPE token's force-quirks flag to
                                         * on.
                                         */
                                        forceQuirks = true;
                                        /*
                                         * Emit that DOCTYPE token.
                                         */
                                        publicIdentifier = LongStrBufToString();
                                        EmitDoctypeToken();
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current DOCTYPE token's
                                         * public identifier.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the DOCTYPE public identifier
                                         * (single-quoted) state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.PROCESSING_INSTRUCTION:
                        //processinginstructionloop: 
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '?':
                                        //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,reconsume, pos);
                                        state = TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK;

                                        break;
                                    // continue stateloop;
                                    default:
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    //breakProcessingInstructionLoop: 
                    case TokenizerState.PROCESSING_INSTRUCTION_QUESTION_MARK:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;

                            }

                            switch (c)
                            {
                                case '>':
                                    //state = Transition(state, Tokenizer.DATA,reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    continue;
                                default:
                                    //state = Transition(state,Tokenizer.PROCESSING_INSTRUCTION,reconsume, pos);
                                    state = TokenizerState.PROCESSING_INSTRUCTION;
                                    continue;
                            }

                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }

Example #45

Show file

        internal static bool TryParse(IList <string> items, TokenizerArgs args, out T to)
        {
            if (items == null)
            {
                throw new ArgumentNullException("items");
            }
            else if (args == null)
            {
                throw new ArgumentNullException("args");
            }

            List <string> cArgs = new List <string>(items);

            using (TokenizerState <T> state = Tokenizer.NewState <T>(args))
            {
                TokenizerDefinition definition = state.Definition;
                to = null;

                int i;

                bool   atEnd      = false;
                char[] checkChars = args.PlusMinSuffixArguments ? new char[] { args.ArgumentValueSeparator, '+', '-' } : new char[] { args.ArgumentValueSeparator };

                int nPlaced = 0;
                for (i = 0; i < cArgs.Count; i++)
                {
                    string a = cArgs[i];

                    if (!atEnd && (a.Length > 1) && args.CommandLineChars.Contains(a[0]))
                    {
                        bool twoStart = a[0] == a[1];
                        if (a.Length == 2 && twoStart)
                        {
                            if (!definition.HasPlacedArguments)
                            {
                                args.ErrorMessage = TokenizerMessages.NoPlacedArgumentsDefined;
                                return(false);
                            }

                            atEnd = true;
                        }
                        else
                        {
                            int  aFrom = twoStart ? 2 : 1;
                            int  aTo   = args.AllowDirectArgs ? a.IndexOfAny(checkChars, aFrom) : -1;
                            char cTo   = (aTo > 0) ? a[aTo] : '\0';

                            string item = (aTo > 0) ? a.Substring(aFrom, aTo - aFrom) : a.Substring(aFrom);

                            TokenItem token;
                            string    value = null;

                            if (definition.TryGetToken(item, args.CaseSensitive, out token))
                            {
                                if (token.RequiresValue)
                                {
                                    if (i + 1 < cArgs.Count)
                                    {
                                        token.Evaluate(cArgs[++i], state);
                                    }
                                    else
                                    {
                                        args.ErrorMessage = TokenizerMessages.RequiredArgumentValueIsMissing;
                                        return(false);
                                    }
                                }
                                else
                                {
                                    token.Evaluate(null, state);
                                }

                                continue;
                            }
                            else
                            {
                                // Look for a shorter argument
                                for (int ii = item.Length - 1; ii > 0; ii--)
                                {
                                    if (definition.TryGetToken(item.Substring(0, ii), args.CaseSensitive, out token) &&
                                        token.AllowDirectValue(item.Substring(ii), state))
                                    {
                                        token.EvaluateDirect(item.Substring(ii), state);
                                        break;
                                    }
                                    else
                                    {
                                        token = null;
                                    }
                                }
                            }

                            if (token == null)
                            {
                                args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.UnknownArgumentX, a);
                                return(false);
                            }

                            if (token.RequiresValue && value == null)
                            {
                                if (i < cArgs.Count - 1)
                                {
                                    value = cArgs[i++];
                                }
                                else
                                {
                                    args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.ValueExpectedForArgumentX, a);
                                    return(false);
                                }
                            }
                            continue;
                        }
                    }
                    else if (!atEnd && args.AllowResponseFile && a.Length > 1 && a[0] == '@')
                    {
                        string file = a.Substring(1);

                        if (!File.Exists(file))
                        {
                            args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.ResponseFileXNotFound, file);
                            return(false);
                        }
                        using (StreamReader sr = File.OpenText(a.Substring(1)))
                        {
                            string line;
                            int    n = i + 1;
                            while (null != (line = sr.ReadLine()))
                            {
                                line = line.TrimStart();

                                if (line.Length > 1)
                                {
                                    if (line[0] != '#')
                                    {
                                        foreach (string word in Tokenizer.GetCommandlineWords(line))
                                        {
                                            cArgs.Insert(n++, word);
                                        }
                                    }
                                }
                            }
                        }

                        continue;
                    }
                    else if (!args.AllowNamedBetweenPlaced)
                    {
                        atEnd = true;
                    }

                    if (state.Definition.HasPlacedArguments)
                    {
                        if (nPlaced < state.Definition.PlacedItems.Count)
                        {
                            state.Definition.PlacedItems[nPlaced].Evaluate(cArgs[i], state);
                            nPlaced++;
                        }
                        else if (state.Definition.RestToken != null)
                        {
                            state.Definition.RestToken.Evaluate(cArgs[i], state);
                        }
                        else
                        {
                            args.ErrorMessage = string.Format(CultureInfo.InvariantCulture, TokenizerMessages.UnknownArgumentX, cArgs[i]);
                            return(false);
                        }
                    }
                }

                if (!state.IsComplete)
                {
                    return(false);
                }

                to = state.Instance;
                return(true);
            }
        }

Example #46

Show file

File: TokenizerRule.cs Project: tathamoddie/RegexAnalyzer

 public TokenizerRule(TokenizerState state, IEnumerable<string> applicableData, TokenType type, Action<Stack<TokenizerState>> stateChange)
     : this(new[] { state }, applicableData, type, stateChange)
 {
 }

Example #47

Show file

File: Tokenizer.cs Project: antmicro/options-parser

        public Token ReadNextToken()
        {
            if(position == input.Length)
            {
                return null;
            }

            if(state != TokenizerState.PositionalValues && input[position] == EscapeMarker)
            {
                ReadNextString();
                state = TokenizerState.PositionalValues;
            }

            if(state == TokenizerState.PositionalValues)
            {
                return new PositionalArgumentToken(ReadNextString(), GetCurrentPosition());
            }

            var location = GetCurrentPosition();
            var c = ReadChar();
            if(state == TokenizerState.ShortName)
            {
                if(c == Tokenizer.EndOfString)
                {
                    state = TokenizerState.Normal;
                    return ReadNextToken();
                }

                return new ShortNameToken(c, location);
            }

            if(c == FlagCharacter)
            {
                var f = PeekChar();
                if(f == FlagCharacter)
                {
                    // we already peeked it, so just move to the next char
                    ReadChar();
                    return new LongNameToken(ReadUntilChar(Tokenizer.EndOfString, AssignmentOperator), location);
                }

                state = TokenizerState.ShortName;
                return ReadNextToken();
            }
            else
            {
                var value = ReadNextString();
                return (value == null) ? null : new PositionalArgumentToken(value, location);
            }
        }

Example #48

Show file

File: TokenizerRule.cs Project: tathamoddie/RegexAnalyzer

 public TokenizerRule(TokenizerState state, string applicableData, TokenType type, Action<Stack<TokenizerState>> stateChange)
     : this(new[] { state }, new[] { applicableData }, type, stateChange)
 {
 }

Example #49

Show file

File: Tokenizer.StateLoop3_Comment.cs Project: prepare/WebParser

        void StateLoop3_Comment(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {

                    case TokenizerState.s45_MARKUP_DECLARATION_OPEN:
                        /*markupdeclarationopenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * If the next two characters are both U+002D
                                 * HYPHEN-MINUS characters (-), consume those two
                                 * characters, create a comment token whose data is the
                                 * empty string, and switch to the comment start state.
                                 * 
                                 * Otherwise, if the next seven characters are an ASCII
                                 * case-insensitive match for the word "DOCTYPE", then
                                 * consume those characters and switch to the DOCTYPE
                                 * state.
                                 * 
                                 * Otherwise, if the insertion mode is
                                 * "in foreign content" and the current node is not an
                                 * element in the HTML namespace and the next seven
                                 * characters are an case-sensitive match for the string
                                 * "[CDATA[" (the five uppercase TokenizerState.letters "CDATA" with a
                                 * U+005B LEFT SQUARE BRACKET character before and
                                 * after), then consume those characters and switch to
                                 * the CDATA section state.
                                 * 
                                 * Otherwise, is is a parse error. Switch to the bogus
                                 * comment state. The next character that is consumed,
                                 * if any, is the first character that will be in the
                                 * comment.
                                 */
                                switch (c)
                                {
                                    case '-':
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
                                        state = TokenizerState.MARKUP_DECLARATION_HYPHEN;
                                        goto breakMarkupdeclarationopenloop;
                                    // goto continueStateloop;
                                    case 'd':
                                    case 'D':
                                        ClearLongStrBufAndAppend(c);
                                        index = 0;
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
                                        state = TokenizerState.MARKUP_DECLARATION_OCTYPE;
                                        goto continueStateloop;
                                    case '[':
                                        if (TokenListener.IsCDataSectionAllowed)
                                        {
                                            ClearLongStrBufAndAppend(c);
                                            index = 0;
                                            //state = Transition(state, Tokenizer.CDATA_START, reconsume, pos);
                                            state = TokenizerState.CDATA_START;
                                            goto continueStateloop;
                                        }
                                        else
                                        {
                                            // else fall through
                                            goto default;
                                        }
                                    default:
                                        ErrBogusComment();
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationopenloop:
                            goto case TokenizerState.MARKUP_DECLARATION_HYPHEN;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.MARKUP_DECLARATION_HYPHEN:
                        /*markupdeclarationhyphenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\u0000':
                                        goto breakStateloop;
                                    case '-':
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.COMMENT_START, reconsume, pos);
                                        state = TokenizerState.s46_COMMENT_START;
                                        goto breakMarkupdeclarationhyphenloop;
                                    // goto continueStateloop;
                                    default:
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakMarkupdeclarationhyphenloop:
                            goto case TokenizerState.s46_COMMENT_START;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s46_COMMENT_START:
                        /*commentstartloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * start dash state.
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
                                        state = TokenizerState.s47_COMMENT_START_DASH;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrPrematureEndOfComment();
                                        /* Emit the comment token. */
                                        EmitComment(0);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;

                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        // state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;

                                        goto breakCommentstartloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the input character to
                                         * the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;

                                        goto breakCommentstartloop;
                                    // goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentstartloop:
                            goto case TokenizerState.s48_COMMENT;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s48_COMMENT:
                        /*commentloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * end dash state
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                        state = TokenizerState.s49_COMMENT_END_DASH;
                                        goto breakCommentloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the input character to
                                         * the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the comment state.
                                         */
                                        continue;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentloop:
                            goto case TokenizerState.s49_COMMENT_END_DASH;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s49_COMMENT_END_DASH:
                        /*commentenddashloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '-':
                                        /*
                                         * U+002D HYPHEN-MINUS (-) Switch to the comment
                                         * end state
                                         */
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                        state = TokenizerState.s50_COMMENT_END;
                                        goto breakCommentenddashloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        /*
                                         * Anything else Append a U+002D HYPHEN-MINUS
                                         * (-) character and the input character to the
                                         * comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakCommentenddashloop:
                            goto case TokenizerState.s50_COMMENT_END;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s50_COMMENT_END:
                        /*commentendloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the comment
                                         * token.
                                         */
                                        EmitComment(2);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        /* U+002D HYPHEN-MINUS (-) Parse error. */
                                        /*
                                         * Append a U+002D HYPHEN-MINUS (-) character to
                                         * the comment token's data.
                                         */
                                        AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
                                        /*
                                         * Stay in the comment end state.
                                         */
                                        continue;
                                    case '\r':
                                        AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AdjustDoubleHyphenAndAppendToLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                    case '!':
                                        ErrHyphenHyphenBang();
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
                                        state = TokenizerState.s51_COMMENT_END_BANG;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Append two U+002D HYPHEN-MINUS (-) characters
                                         * and the input character to the comment
                                         * token's data.
                                         */
                                        AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s51_COMMENT_END_BANG:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the comment
                                         * token.
                                         */
                                        EmitComment(3);
                                        /*
                                         * Switch to the data state.
                                         */
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        /*
                                         * Append two U+002D HYPHEN-MINUS (-) characters
                                         * and a U+0021 EXCLAMATION MARK (!) character
                                         * to the comment token's data.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment end dash state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                        state = TokenizerState.s49_COMMENT_END_DASH;
                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append two U+002D HYPHEN-MINUS
                                         * (-) characters, a U+0021 EXCLAMATION MARK (!)
                                         * character, and the input character to the
                                         * comment token's data. Switch to the comment
                                         * state.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Switch to the comment state.
                                         */
                                        //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                        state = TokenizerState.s48_COMMENT;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s47_COMMENT_START_DASH:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //-------------------------------
                                //eof
                                goto breakStateloop;
                            }
                            //----------------------

                            switch (c)
                            {
                                case '-':
                                    /*
                                     * U+002D HYPHEN-MINUS (-) Switch to the comment end
                                     * state
                                     */
                                    AppendLongStrBuf(c);
                                    //state = Transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                    state = TokenizerState.s50_COMMENT_END;
                                    goto continueStateloop;
                                case '>':
                                    ErrPrematureEndOfComment();
                                    /* Emit the comment token. */
                                    EmitComment(1);
                                    /*
                                     * Switch to the data state.
                                     */
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                case '\r':
                                    AppendLongStrBufCarriageReturn();
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto breakStateloop;
                                case '\n':
                                    AppendLongStrBufLineFeed();
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto continueStateloop;
                                case '\u0000':
                                    c = '\uFFFD';
                                    // fall thru
                                    goto default;
                                default:
                                    /*
                                     * Append a U+002D HYPHEN-MINUS character (-) and
                                     * the current input character to the comment
                                     * token's data.
                                     */
                                    AppendLongStrBuf(c);
                                    /*
                                     * Switch to the comment state.
                                     */
                                    //state = Transition(state, Tokenizer.COMMENT, reconsume, pos);
                                    state = TokenizerState.s48_COMMENT;
                                    goto continueStateloop;
                            }
                        }
                    // XXX reorder point
                    case TokenizerState.CDATA_START:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                if (index < 6)
                                { // CDATA_LSQB.Length
                                    if (c == Tokenizer.CDATA_LSQB[index])
                                    {
                                        AppendLongStrBuf(c);
                                    }
                                    else
                                    {
                                        ErrBogusComment();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    index++;
                                    continue;
                                }
                                else
                                {
                                    reader.StartCollect(); // start coalescing
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto case TokenizerState.s68_CDATA_SECTION;
                                    //break; // FALL THROUGH goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                            //------------------------------------

                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s68_CDATA_SECTION:
                        /*cdatasectionloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        FlushChars();
                                        //state = Transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB;
                                        goto breakCdatasectionloop; // FALL THROUGH
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        continue;
                                }
                            }
                            goto breakStateloop;
                        //------------------------------------
                        breakCdatasectionloop:
                            goto case TokenizerState.CDATA_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB:
                        /*cdatarsqb:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case ']':
                                        //state = Transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
                                        state = TokenizerState.CDATA_RSQB_RSQB;
                                        goto breakCdatarsqb;
                                    default:
                                        TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 1);
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                        state = TokenizerState.s68_CDATA_SECTION;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------ 
                        breakCdatarsqb:
                            goto case TokenizerState.CDATA_RSQB_RSQB;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CDATA_RSQB_RSQB:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            switch (c)
                            {
                                case '>':
                                    //cstart = pos + 1;
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                default:
                                    TokenListener.Characters(Tokenizer.RSQB_RSQB, 0, 2);
                                    reader.StartCollect();
                                    //state = Transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                    state = TokenizerState.s68_CDATA_SECTION;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;

                            }
                        }

                    // XXX reorder point
                    // BEGIN HOTSPOT WORKAROUND
                    case TokenizerState.s44_BOGUS_COMMENT:
                        /*boguscommentloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume every character up to and including the first
                                 * U+003E GREATER-THAN SIGN character (>) or the end of
                                 * the file (EOF), whichever comes first. Emit a comment
                                 * token whose data is the concatenation of all the
                                 * characters starting from and including the character
                                 * that caused the state machine to switch into the
                                 * bogus comment state, up to and including the
                                 * character immediately before the last consumed
                                 * character (i.e. up to the character just before the
                                 * U+003E or EOF character). (If the comment was started
                                 * by the end of the file (EOF), the token is empty.)
                                 * 
                                 * Switch to the data state.
                                 * 
                                 * If the end of the file was reached, reconsume the EOF
                                 * character.
                                 */
                                switch (c)
                                {
                                    case '>':
                                        EmitComment(0);
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
                                        state = TokenizerState.BOGUS_COMMENT_HYPHEN;
                                        goto breakBoguscommentloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        AppendLongStrBuf(c);
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBoguscommentloop:
                            goto case TokenizerState.BOGUS_COMMENT_HYPHEN;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.BOGUS_COMMENT_HYPHEN:
                        /*boguscommenthyphenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '>':
                                        // [NOCPP[
                                        MaybeAppendSpaceToBogusComment();
                                        // ]NOCPP]
                                        EmitComment(0);
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    case '-':
                                        AppendSecondHyphenToBogusComment();
                                        goto continueBoguscommenthyphenloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        AppendLongStrBuf(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                }
                            //------------------------------------
                            continueBoguscommenthyphenloop:
                                continue;
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

       breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }

Example #50

Show file

File: JsonParser.cs Project: blakeohare/SimpleJson

            private static Token[] Init(string text, bool allowComments)
            {
                List <Token> tokens = new List <Token>();

                // This is a hack to prevent the need to "close" token/state types.
                // If the state is not NONE at the end, then either a comment or string was left open.
                text += "\n";

                int            length       = text.Length;
                TokenizerState state        = TokenizerState.NONE;
                List <string>  tokenBuilder = new List <string>();
                int            tokenStart   = 0;

                int[] lines   = new int[length];
                int[] columns = new int[length];
                int   line    = 1;
                int   column  = 1;

                for (int i = 0; i < length; ++i)
                {
                    lines[i]   = line;
                    columns[i] = column++;
                    if (text[i] == '\n')
                    {
                        line++;
                        column = 1;
                    }
                }

                char stringType = '\0';
                char c;

                for (int i = 0; i < length; ++i)
                {
                    c = text[i];
                    switch (state)
                    {
                    case TokenizerState.NONE:
                        switch (c)
                        {
                        case ' ':
                        case '\r':
                        case '\n':
                        case '\t':
                            // skip whitespace
                            break;

                        case '"':
                        case '\'':
                            stringType = c;
                            tokenStart = i;
                            state      = TokenizerState.STRING;
                            break;

                        case '/':
                            if (allowComments && i + 1 < length && text[i + 1] == '*')
                            {
                                state = TokenizerState.COMMENT;
                                i++;         // do not allow /*/ as a self-closing comment.
                            }
                            else
                            {
                                // Go ahead and add as a token and let the parser throw the exception.
                                tokens.Add(new Token("/", lines[i], columns[i]));
                            }
                            break;

                        default:
                            // numbers, unquoted strings, nulls, booleans
                            if ((c >= '0' && c <= '9') ||
                                (c >= 'a' && c <= 'z') ||
                                (c >= 'A' && c <= 'Z') ||
                                c == '_' ||
                                c == '.' ||
                                c == '-')
                            {
                                state      = TokenizerState.WORD;
                                tokenStart = i;
                            }
                            else
                            {
                                // Either a JSON syntax character (like a bracket, comma, or colon) or an error.
                                tokens.Add(new Token(c.ToString(), lines[i], columns[i]));
                            }
                            break;
                        }
                        break;

                    case TokenizerState.COMMENT:
                        if (c == '*')
                        {
                            if (i + 1 < length && text[i + 1] == '/')
                            {
                                ++i;     // skip the slash
                                state = TokenizerState.NONE;
                            }
                        }
                        break;

                    case TokenizerState.STRING:
                        if (c == stringType)
                        {
                            tokens.Add(new Token(text.Substring(tokenStart, i - tokenStart + 1), lines[tokenStart], columns[tokenStart]));
                            state = TokenizerState.NONE;
                        }
                        else if (c == '\\')
                        {
                            i++;
                        }
                        break;

                    case TokenizerState.WORD:
                        if ((c >= '0' && c <= '9') ||
                            (c >= 'a' && c <= 'z') ||
                            (c >= 'A' && c <= 'Z') ||
                            c == '_' ||
                            c == '.' ||
                            c == '-')
                        {
                            // continue the word. Make sure this expression always matches the one above it in the NONE state.
                        }
                        else
                        {
                            tokens.Add(new Token(text.Substring(tokenStart, i - tokenStart), lines[tokenStart], columns[tokenStart]));
                            --i;
                            state = TokenizerState.NONE;
                        }
                        break;
                    }
                }

                if (state != TokenizerState.NONE)
                {
                    if (state == TokenizerState.COMMENT)
                    {
                        throw new JsonParserException("Unexpected EOF detected. A comment seems to be left unclosed.");
                    }
                    throw new JsonParserException("Unexpected EOF detected. A string seems to be left unclosed.");
                }

                return(tokens.ToArray());
            }

Example #51

Show file

File: Tokenizer.cs Project: fireflycons/Invoke-SqlExecute

        /// <summary>
        /// Gets the next token.
        /// </summary>
        /// <returns>
        /// A <see cref="Token" /> or <c>null</c> at end of string.
        /// </returns>
        public Token GetNextToken()
        {
            this.valueBuilder.Clear();
            var previousState = this.State;

            while (this.position < this.lineChars.Length)
            {
                // Peek current position.
                var s1 = new string(this.lineChars[this.position], 1);

                // Peek 2 character string at current position.
                var s2 = new string(
                    this.lineChars.Skip(this.position).Take(Math.Min(2, this.lineChars.Length - this.position))
                    .ToArray());

                switch (this.State)
                {
                case TokenizerState.None:

                    // Look for start of string literal
                    switch (s1)
                    {
                    case SingleQuote:

                        this.State = TokenizerState.SingleQuoteString;
                        this.Consume();
                        continue;

                    case DoubleQuote:

                        this.State = TokenizerState.DoubleQuoteString;
                        this.Consume();
                        continue;
                    }

                    // Look for start/end of comments
                    switch (s2)
                    {
                    case LineComment:

                        this.State = TokenizerState.LineComment;

                        if (this.position == 0)
                        {
                            // If at start of line, continue parsing comment
                            continue;
                        }

                        break;

                    case BlockCommentStart:

                        this.State = TokenizerState.BlockComment;

                        if (this.position == 0)
                        {
                            // If at start of line, continue parsing comment
                            continue;
                        }

                        break;

                    case BlockCommentEnd:

                        throw new InvalidOperationException("Found BlockCommentEnd when not within a block comment");

                    default:
                        this.Consume();
                        continue;
                    }

                    break;

                case TokenizerState.BlockComment:

                    // Look for block comment end
                    if (s2 == BlockCommentEnd)
                    {
                        this.State = TokenizerState.None;
                        this.Consume(2);
                    }
                    else
                    {
                        this.Consume();
                        continue;
                    }

                    break;

                case TokenizerState.LineComment:

                    // While in a line comment, consume till end of string
                    this.Consume();
                    continue;

                case TokenizerState.SingleQuoteString:

                    if (s2 == QuoteQuote)
                    {
                        // QuoteQuote within single quote string literal is part of the string
                        this.Consume(2);
                        continue;
                    }

                    if (s1 == SingleQuote)
                    {
                        // End of string literal
                        this.State = TokenizerState.None;
                    }

                    this.Consume();
                    continue;

                case TokenizerState.DoubleQuoteString:

                    if (s1 == DoubleQuote)
                    {
                        // End of string literal
                        this.State = TokenizerState.None;
                    }

                    this.Consume();
                    continue;
                }

                // If we get here, the token is ready to be emitted
                break;
            }

            var value = this.valueBuilder.ToString();

            if (value.Length == 0)
            {
                // End of line
                if (this.State == TokenizerState.LineComment)
                {
                    // Thus end of line comment
                    this.State = TokenizerState.None;
                }

                return(null);
            }

            if (previousState == TokenizerState.BlockComment || previousState == TokenizerState.LineComment)
            {
                return(new Token(TokenType.Comment, value));
            }

            return(new Token(TokenType.Text, value));
        }

Example #52

Show file

File: Tokenizer.StateLoop3_TagAndAttrs.cs Project: prepare/WebParser

        void StateLoop3_Tag(TokenizerState state, TokenizerState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */

            /*stateloop:*/
            for (; ; )
            {


                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                    case TokenizerState.s01_DATA:
                        /*dataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in data state.
                                         */
                                        FlushChars();
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\u0000');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the tag
                                         * open state.
                                         */
                                        FlushChars();

                                        //state = Transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
                                        state = TokenizerState.s08_TAG_OPEN;
                                        goto breakDataloop; // FALL THROUGH continue
                                    // stateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the input character as a
                                         * character token.
                                         * 
                                         * Stay in the data state.
                                         */
                                        continue;
                                }
                            }


                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------
                        breakDataloop:
                            goto case TokenizerState.s08_TAG_OPEN;
                            //------------      
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.s08_TAG_OPEN:
                        /*tagopenloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * The behavior of this state depends on the content
                                 * model flag.
                                 */

                                /*
                                 * If the content model flag is set to the PCDATA state
                                 * Consume the next input character:
                                 */
                                if (c >= 'A' && c <= 'Z')
                                {
                                    /*
                                     * U+0041 LATIN CAPITAL LETTER A through to U+005A
                                     * LATIN CAPITAL LETTER Z Create a new start tag
                                     * token,
                                     */
                                    endTag = false;
                                    /*
                                     * set its tag name to the lowercase TokenizerState.version of the
                                     * input character (add 0x0020 to the character's
                                     * code point),
                                     */
                                    ClearStrBufAndAppend((char)(c + 0x20));
                                    /* then switch to the tag name state. */
                                    //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                    state = TokenizerState.s10_TAG_NAME;
                                    /*
                                     * (Don't emit the token yet; further details will
                                     * be filled in before it is emitted.)
                                     */
                                    goto breakTagopenloop;
                                    // goto continueStateloop;
                                }
                                else if (c >= 'a' && c <= 'z')
                                {
                                    /*
                                     * U+0061 LATIN SMALL LETTER A through to U+007A
                                     * LATIN SMALL LETTER Z Create a new start tag
                                     * token,
                                     */
                                    endTag = false;
                                    /*
                                     * set its tag name to the input character,
                                     */
                                    ClearStrBufAndAppend(c);
                                    /* then switch to the tag name state. */
                                    //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                    state = TokenizerState.s10_TAG_NAME;
                                    /*
                                     * (Don't emit the token yet; further details will
                                     * be filled in before it is emitted.)
                                     */
                                    goto breakTagopenloop;
                                    // goto continueStateloop;
                                }
                                switch (c)
                                {
                                    case '!':
                                        /*
                                         * U+0021 EXCLAMATION MARK (!) Switch to the
                                         * markup declaration open state.
                                         */
                                        //state = Transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
                                        state = TokenizerState.s45_MARKUP_DECLARATION_OPEN;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the close tag
                                         * open state.
                                         */
                                        //state = Transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
                                        state = TokenizerState.s09_CLOSE_TAG_OPEN;
                                        goto continueStateloop;
                                    case '?':
                                        /*
                                         * U+003F QUESTION MARK (?) Parse error.
                                         */
                                        ErrProcessingInstruction();
                                        /*
                                         * Switch to the bogus comment state.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrLtGt();
                                        /*
                                         * Emit a U+003C LESS-THAN SIGN character token
                                         * and a U+003E GREATER-THAN SIGN character
                                         * token.
                                         */
                                        TokenListener.Characters(LT_GT, 0, 2);
                                        /* Switch to the data state. */
                                        //cstart = pos + 1;
                                        reader.SkipOneAndStartCollect();
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrBadCharAfterLt(c);
                                        /*
                                         * Emit a U+003C LESS-THAN SIGN character token
                                         */
                                        TokenListener.Characters(LT_GT, 0, 1);
                                        /*
                                         * and reconsume the current input character in
                                         * the data state.
                                         */
                                        reader.StartCollect();
                                        //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                        state = TokenizerState.s01_DATA;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakTagopenloop:
                            goto case TokenizerState.s10_TAG_NAME;
                        }
                    //  FALL THROUGH DON'T REORDER
                    case TokenizerState.s10_TAG_NAME:
                        /*tagnameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakTagnameloop;
                                    // goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        StrBufToElementNameString();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Append the
                                             * lowercase TokenizerState.version of the current input
                                             * character (add 0x0020 to the character's
                                             * code point) to the current tag token's
                                             * tag name.
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current tag token's tag
                                         * name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the tag name state.
                                         */
                                        continue;
                                }
                            }

                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakTagnameloop:
                            goto case TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s34_BEFORE_ATTRIBUTE_NAME:
                        /*beforeattributenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before attribute name state.
                                         */
                                        continue;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;

                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '\"';
                                    case '\"':
                                    case '\'':
                                    case '<':
                                    case '=':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                         * SIGN (=) Parse error.
                                         */
                                        ErrBadCharBeforeAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Start a new attribute in the
                                         * current tag token.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Set that
                                             * attribute's name to the lowercase TokenizerState.version
                                             * of the current input character (add
                                             * 0x0020 to the character's code point)
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Set that attribute's name to the current
                                         * input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * and its value to the empty string.
                                         */
                                        // Will do later.
                                        /*
                                         * Switch to the attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s35_ATTRIBUTE_NAME;
                                        goto breakBeforeattributenameloop;
                                    // goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforeattributenameloop:
                            goto case TokenizerState.s35_ATTRIBUTE_NAME;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s35_ATTRIBUTE_NAME:
                        /*attributenameloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s36_AFTER_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the after attribute name state.
                                         */
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s36_AFTER_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        AttributeNameComplete();
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '=':
                                        /*
                                         * U+003D EQUALS SIGN (=) Switch to the before
                                         * attribute value state.
                                         */
                                        AttributeNameComplete();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
                                        state = TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                                        goto breakAttributenameloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AttributeNameComplete();
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '\"';
                                    case '\"':
                                    case '\'':
                                    case '<':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) Parse error.
                                         */
                                        ErrQuoteOrLtInAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Append the
                                             * lowercase TokenizerState.version of the current input
                                             * character (add 0x0020 to the character's
                                             * code point) to the current attribute's
                                             * name.
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's name.
                                         */
                                        AppendStrBuf(c);
                                        /*
                                         * Stay in the attribute name state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributenameloop:
                            goto case TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE:
                        /*beforeattributevalueloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the before attribute value state.
                                         */
                                        continue;
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the
                                         * attribute value (double-quoted) state.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED;

                                        goto breakBeforeattributevalueloop;
                                    // goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the attribute
                                         * value (unquoted) state and reconsume this
                                         * input character.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                        state = TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED;
                                        NoteUnquotedAttributeValue();
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the attribute
                                         * value (single-quoted) state.
                                         */
                                        ClearLongStrBuf();
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s39_ATTRIBUTE_VALUE_SINGLE_QUOTED;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Parse error.
                                         */
                                        ErrAttributeValueMissing();
                                        /*
                                         * Emit the current tag token.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto case '<';
                                    case '<':
                                    case '=':
                                    case '`':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
                                         * (=) U+0060 GRAVE ACCENT (`)
                                         */
                                        ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        // [NOCPP[
                                        ErrHtml4NonNameInUnquotedAttribute(c);
                                        // ]NOCPP]
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        /*
                                         * Switch to the attribute value (unquoted)
                                         * state.
                                         */

                                        //state = Transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                        state = TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED;

                                        NoteUnquotedAttributeValue();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakBeforeattributevalueloop:
                            goto case TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
                        }

                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s38_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
                        /*attributevaluedoublequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '"':
                                        /*
                                         * U+0022 QUOTATION MARK (") Switch to the after
                                         * attribute value (quoted) state.
                                         */
                                        AddAttributeWithValue();

                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                                        goto breakAttributevaluedoublequotedloop;
                                    // goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * additional allowed character being U+0022
                                         * QUOTATION MARK (").
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\"');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;

                                        goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        // fall thru
                                        goto default;
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (double-quoted)
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributevaluedoublequotedloop:
                            goto case TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED:
                        /*afterattributevaluequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto breakAfterattributevaluequotedloop;
                                    // goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    default:
                                        /*
                                         * Anything else Parse error.
                                         */
                                        ErrNoSpaceBetweenAttributes();
                                        /*
                                         * Reconsume the character in the before
                                         * attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAfterattributevaluequotedloop:
                            goto case TokenizerState.s43_SELF_CLOSING_START_TAG;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.s43_SELF_CLOSING_START_TAG:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                goto breakStateloop;
                            }
                            //---------------------------------
                            /*
                             * Consume the next input character:
                             */
                            switch (c)
                            {
                                case '>':
                                    /*
                                     * U+003E GREATER-THAN SIGN (>) Set the self-closing
                                     * flag of the current tag token. Emit the current
                                     * tag token.
                                     */
                                    // [NOCPP[
                                    ErrHtml4XmlVoidSyntax();
                                    // ]NOCPP]
                                    //state = Transition(state, EmitCurrentTagToken(true, pos), reconsume, pos);
                                    state = EmitCurrentTagToken(true);
                                    if (shouldSuspend)
                                    {
                                        goto breakStateloop;
                                    }
                                    /*
                                     * Switch to the data state.
                                     */
                                    goto continueStateloop;
                                default:
                                    /* Anything else Parse error. */
                                    ErrSlashNotFollowedByGt();
                                    /*
                                     * Reconsume the character in the before attribute
                                     * name state.
                                     */
                                    //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                    state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                    reader.StepBack();
                                    //reconsume = true;
                                    goto continueStateloop;
                            }
                        }
                    // XXX reorder point
                    case TokenizerState.s40_ATTRIBUTE_VALUE_UNQUOTED:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        AddAttributeWithValue();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE
                                         * Switch to the before attribute name state.
                                         */
                                        AddAttributeWithValue();
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s34_BEFORE_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * additional allowed character being U+003E
                                         * GREATER-THAN SIGN (>)
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('>');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AddAttributeWithValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto case '<';
                                    // fall thru
                                    case '<':
                                    case '\"':
                                    case '\'':
                                    case '=':
                                    case '`':
                                        /*
                                         * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                         * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                         * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
                                         */
                                        ErrUnquotedAttributeValOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        // fall through
                                        goto default;
                                    default:
                                        // [NOCPP]
                                        ErrHtml4NonNameInUnquotedAttribute(c);
                                        // ]NOCPP]
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (unquoted) state.
                                         */
                                        continue;
                                }
                            }
                            //-------------------------------
                            goto breakStateloop;
                        }
                    // XXX reorder point
                    case TokenizerState.s36_AFTER_ATTRIBUTE_NAME:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '\r':
                                        SilentCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                    case ' ':
                                    case '\t':
                                    case '\u000C':
                                        /*
                                         * U+0009 CHARACTER TABULATION U+000A LINE FEED
                                         * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
                                         * in the after attribute name state.
                                         */
                                        continue;
                                    case '/':
                                        /*
                                         * U+002F SOLIDUS (/) Switch to the self-closing
                                         * start tag state.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
                                        state = TokenizerState.s43_SELF_CLOSING_START_TAG;
                                        goto continueStateloop;
                                    case '=':
                                        /*
                                         * U+003D EQUALS SIGN (=) Switch to the before
                                         * attribute value state.
                                         */
                                        //state = Transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
                                        state = TokenizerState.s37_BEFORE_ATTRIBUTE_VALUE;
                                        goto continueStateloop;
                                    case '>':
                                        /*
                                         * U+003E GREATER-THAN SIGN (>) Emit the current
                                         * tag token.
                                         */
                                        AddAttributeWithoutValue();
                                        //state = Transition(state, EmitCurrentTagToken(false, pos), reconsume, pos);
                                        state = EmitCurrentTagToken(false);
                                        if (shouldSuspend)
                                        {
                                            goto breakStateloop;
                                        }
                                        /*
                                         * Switch to the data state.
                                         */
                                        goto continueStateloop;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto case '\"';
                                    // fall thru
                                    case '\"':
                                    case '\'':
                                    case '<':
                                        ErrQuoteOrLtInAttributeNameOrNull(c);
                                        /*
                                         * Treat it as per the "anything else" entry
                                         * below.
                                         */
                                        goto default;
                                    default:
                                        AddAttributeWithoutValue();
                                        /*
                                         * Anything else Start a new attribute in the
                                         * current tag token.
                                         */
                                        if (c >= 'A' && c <= 'Z')
                                        {
                                            /*
                                             * U+0041 LATIN CAPITAL LETTER A through to
                                             * U+005A LATIN CAPITAL LETTER Z Set that
                                             * attribute's name to the lowercase TokenizerState.version
                                             * of the current input character (add
                                             * 0x0020 to the character's code point)
                                             */
                                            c += (char)0x20;
                                        }
                                        /*
                                         * Set that attribute's name to the current
                                         * input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * and its value to the empty string.
                                         */
                                        // Will do later.
                                        /*
                                         * Switch to the attribute name state.
                                         */
                                        //state = Transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
                                        state = TokenizerState.s35_ATTRIBUTE_NAME;
                                        goto continueStateloop;
                                }
                            }
                            //-------------------------------
                            //eof
                            goto breakStateloop;
                        }

                    // XXX reorder point
                    case TokenizerState.s39_ATTRIBUTE_VALUE_SINGLE_QUOTED:
                        /*attributevaluesinglequotedloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                /*
                                 * Consume the next input character:
                                 */
                                switch (c)
                                {
                                    case '\'':
                                        /*
                                         * U+0027 APOSTROPHE (') Switch to the after
                                         * attribute value (quoted) state.
                                         */
                                        AddAttributeWithValue();

                                        //state = Transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
                                        state = TokenizerState.s42__AFTER_ATTRIBUTE_VALUE_QUOTED;
                                        goto continueStateloop;
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in attribute value state, with the
                                         * + additional allowed character being U+0027
                                         * APOSTROPHE (').
                                         */
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\'');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                        state = TokenizerState.CONSUME_CHARACTER_REFERENCE;
                                        goto breakAttributevaluesinglequotedloop;
                                    // goto continueStateloop;
                                    case '\r':
                                        AppendLongStrBufCarriageReturn();
                                        goto breakStateloop;
                                    case '\n':
                                        AppendLongStrBufLineFeed();
                                        continue;
                                    case '\u0000':
                                        c = '\uFFFD';
                                        goto default;
                                    // fall thru
                                    default:
                                        /*
                                         * Anything else Append the current input
                                         * character to the current attribute's value.
                                         */
                                        AppendLongStrBuf(c);
                                        /*
                                         * Stay in the attribute value (double-quoted)
                                         * state.
                                         */
                                        continue;
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //------------------------------------
                        breakAttributevaluesinglequotedloop:
                            goto case TokenizerState.CONSUME_CHARACTER_REFERENCE;
                        }
                    // FALLTHRU DON'T REORDER
                    case TokenizerState.CONSUME_CHARACTER_REFERENCE:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }
                            if (c == '\u0000')
                            {
                                goto breakStateloop;
                            }
                            /*
                             * Unlike the definition is the spec, this state does not
                             * return a value and never requires the caller to
                             * backtrack. This state takes care of emitting characters
                             * or appending to the current attribute value. It also
                             * takes care of that in the case TokenizerState.when consuming the
                             * character reference fails.
                             */
                            /*
                             * This section defines how to consume a character
                             * reference. This definition is used when parsing character
                             * references in text and in attributes.
                             * 
                             * The behavior depends on the identity of the next
                             * character (the one immediately after the U+0026 AMPERSAND
                             * character):
                             */
                            switch (c)
                            {
                                case ' ':
                                case '\t':
                                case '\n':
                                case '\r': // we'll reconsume!
                                case '\u000C':
                                case '<':
                                case '&':
                                    EmitOrAppendStrBuf(returnState);
                                    //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                    {
                                        reader.StartCollect();
                                    }
                                    //state = Transition(state, returnState, reconsume, pos);
                                    state = returnState;
                                    //reconsume = true;
                                    reader.StepBack();
                                    goto continueStateloop;
                                case '#':
                                    /*
                                     * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
                                     * SIGN.
                                     */
                                    AppendStrBuf('#');
                                    //state = Transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
                                    state = TokenizerState.CONSUME_NCR;
                                    goto continueStateloop;
                                default:
                                    if (c == additional)
                                    {
                                        EmitOrAppendStrBuf(returnState);
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    if (c >= 'a' && c <= 'z')
                                    {
                                        firstCharKey = c - 'a' + 26;
                                    }
                                    else if (c >= 'A' && c <= 'Z')
                                    {
                                        firstCharKey = c - 'A';
                                    }
                                    else
                                    {
                                        // No match
                                        /*
                                         * If no match can be made, then this is a parse
                                         * error.
                                         */
                                        ErrNoNamedCharacterMatch();
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    // Didn't fail yet
                                    AppendStrBuf(c);
                                    //state = Transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
                                    state = TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;

                                    // FALL THROUGH goto continueStateloop;
                                    break;
                            }
                            //------------------------------------
                            goto case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case TokenizerState.CHARACTER_REFERENCE_HILO_LOOKUP:
                        {
                            char c;
                            if (reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }

                            if (c == '\u0000')
                            {
                                goto breakStateloop;
                            }
                            /*
                             * The data structure is as follows:
                             * 
                             * HILO_ACCEL is a two-dimensional int array whose major
                             * index corresponds to the second character of the
                             * character reference (code point as index) and the
                             * minor index corresponds to the first character of the
                             * character reference (packed so that A-Z runs from 0
                             * to 25 and a-z runs from 26 to 51). This layout makes
                             * it easier to use the sparseness of the data structure
                             * to omit parts of it: The second dimension of the
                             * table is null when no character reference starts with
                             * the character corresponding to that row.
                             * 
                             * The int value HILO_ACCEL (by these indeces) is zero
                             * if there exists no character reference starting with
                             * that two-letter prefix. Otherwise, the value is an
                             * int that packs two shorts so that the higher short is
                             * the index of the highest character reference name
                             * with that prefix in NAMES and the lower short
                             * corresponds to the index of the lowest character
                             * reference name with that prefix. (It happens that the
                             * first two character reference names share their
                             * prefix so the packed int cannot be 0 by packing the
                             * two shorts.)
                             * 
                             * NAMES is an array of byte arrays where each byte
                             * array encodes the name of a character references as
                             * ASCII. The names omit the first two letters of the
                             * name. (Since storing the first two letters would be
                             * redundant with the data contained in HILO_ACCEL.) The
                             * entries are lexically sorted.
                             * 
                             * For a given index in NAMES, the same index in VALUES
                             * contains the corresponding expansion as an array of
                             * two UTF-16 code units (either the character and
                             * U+0000 or a suggogate pair).
                             */
                            int hilo = 0;
                            if (c <= 'z')
                            {
                                int[] row = NamedCharactersAccel.HILO_ACCEL[c];
                                if (row != null)
                                {
                                    hilo = row[firstCharKey];
                                }
                            }
                            if (hilo == 0)
                            {
                                /*
                                 * If no match can be made, then this is a parse
                                 * error.
                                 */
                                ErrNoNamedCharacterMatch();
                                EmitOrAppendStrBuf(returnState);
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                            }
                            // Didn't fail yet
                            AppendStrBuf(c);
                            lo = hilo & 0xFFFF;
                            hi = hilo >> 16;
                            entCol = -1;
                            candidate = -1;
                            strBufMark = 0;
                            //state = Transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
                            state = TokenizerState.CHARACTER_REFERENCE_TAIL;
                            // FALL THROUGH goto continueStateloop;
                            goto case TokenizerState.CHARACTER_REFERENCE_TAIL;
                        }
                    case TokenizerState.CHARACTER_REFERENCE_TAIL:
                        /*outer:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                if (c == '\u0000')
                                {
                                    goto breakStateloop;
                                }
                                entCol++;
                                /*
                                 * Consume the maximum number of characters possible,
                                 * with the consumed characters matching one of the
                                 * identifiers in the first column of the named
                                 * character references table (in a case-sensitive
                                 * manner).
                                 */
                                /*loloop:*/
                                for (; ; )
                                {
                                    if (hi < lo)
                                    {
                                        goto breakOuter;
                                    }
                                    if (entCol == NamedCharacters.NAMES[lo].Length)
                                    {
                                        candidate = lo;
                                        strBufMark = this.strBuffer.Length;
                                        lo++;
                                    }
                                    else if (entCol > NamedCharacters.NAMES[lo].Length)
                                    {
                                        goto breakOuter;
                                    }
                                    else if (c > NamedCharacters.NAMES[lo][entCol])
                                    {
                                        lo++;
                                    }
                                    else
                                    {
                                        goto breakLoloop;
                                    }
                                }
                            breakLoloop:

                                /*hiloop:*/
                                for (; ; )
                                {
                                    if (hi < lo)
                                    {
                                        goto breakOuter;
                                    }
                                    if (entCol == NamedCharacters.NAMES[hi].Length)
                                    {
                                        goto breakHiloop;
                                    }
                                    if (entCol > NamedCharacters.NAMES[hi].Length)
                                    {
                                        goto breakOuter;
                                    }
                                    else if (c < NamedCharacters.NAMES[hi][entCol])
                                    {
                                        hi--;
                                    }
                                    else
                                    {
                                        goto breakHiloop;
                                    }
                                }

                            breakHiloop:

                                if (hi < lo)
                                {
                                    goto breakOuter;
                                }
                                AppendStrBuf(c);
                                continue;
                            }

                        breakOuter:

                            if (candidate == -1)
                            {
                                // reconsume deals with CR, LF or nul
                                /*
                                 * If no match can be made, then this is a parse error.
                                 */
                                ErrNoNamedCharacterMatch();
                                EmitOrAppendStrBuf(returnState);
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                            }
                            else
                            {
                                // c can't be CR, LF or nul if we got here
                                string candidateName = NamedCharacters.NAMES[candidate];
                                if (candidateName.Length == 0
                                        || candidateName[candidateName.Length - 1] != ';')
                                {
                                    /*
                                     * If the last character matched is not a U+003B
                                     * SEMICOLON (;), there is a parse error.
                                     */
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        /*
                                         * If the entity is being consumed as part of an
                                         * attribute, and the last character matched is
                                         * not a U+003B SEMICOLON (;),
                                         */
                                        char ch;
                                        if (strBufMark == this.strBuffer.Length)
                                        {
                                            ch = c;
                                        }
                                        else
                                        {
                                            // if (strBufOffset != -1) {
                                            // ch = buf[strBufOffset + strBufMark];
                                            // } else {
                                            ch = this.strBuffer[strBufMark];
                                            // }
                                        }
                                        if (ch == '=' || (ch >= '0' && ch <= '9')
                                                || (ch >= 'A' && ch <= 'Z')
                                                || (ch >= 'a' && ch <= 'z'))
                                        {
                                            /*
                                             * and the next character is either a U+003D
                                             * EQUALS SIGN character (=) or in the range
                                             * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
                                             * U+0041 LATIN CAPITAL LETTER A to U+005A
                                             * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                             * SMALL LETTER A to U+007A LATIN SMALL
                                             * LETTER Z, then, for historical reasons,
                                             * all the characters that were matched
                                             * after the U+0026 AMPERSAND (&) must be
                                             * unconsumed, and nothing is returned.
                                             */
                                            ErrNoNamedCharacterMatch();
                                            AppendStrBufToLongStrBuf();
                                            //state = Transition(state, returnState, reconsume, pos);
                                            state = returnState;
                                            //reconsume = true;
                                            reader.StepBack();
                                            goto continueStateloop;
                                        }
                                    }
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        ErrUnescapedAmpersandInterpretedAsCharacterReference();
                                    }
                                    else
                                    {
                                        ErrNotSemicolonTerminated();
                                    }
                                }

                                /*
                                 * Otherwise, return a character token for the character
                                 * corresponding to the entity name (as given by the
                                 * second column of the named character references
                                 * table).
                                 */

                                char[] val = NamedCharacters.VALUES[candidate];
                                if (val.Length == 1)
                                {
                                    EmitOrAppendOne(val, returnState);
                                }
                                else
                                {
                                    EmitOrAppendTwo(val, returnState);
                                }
                                // this is so complicated!
                                if (strBufMark < this.strBuffer.Length)
                                {
                                    // if (strBufOffset != -1) {
                                    // if ((returnState & (~1)) != 0) {
                                    // for (int i = strBufMark; i < strBufLen; i++) {
                                    // appendLongStrBuf(buf[strBufOffset + i]);
                                    // }
                                    // } else {
                                    // tokenHandler.Characters(buf, strBufOffset
                                    // + strBufMark, strBufLen
                                    // - strBufMark);
                                    // }
                                    // } else {
                                    //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
                                    if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
                                    {
                                        int j = this.strBuffer.Length;
                                        for (int i = strBufMark; i < j; i++)
                                        {
                                            AppendLongStrBuf(strBuffer[i]);
                                        }
                                    }
                                    else
                                    {
                                        TokenListener.Characters(CopyFromStringBuiler(this.strBuffer, strBufMark, this.strBuffer.Length - strBufMark));
                                    }
                                    // }
                                }
                                //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                {
                                    reader.StartCollect();
                                }
                                //state = Transition(state, returnState, reconsume, pos);
                                state = returnState;
                                //reconsume = true;
                                reader.StepBack();
                                goto continueStateloop;
                                /*
                                 * If the markup contains I'm &notit; I tell you, the
                                 * entity is parsed as "not", as in, I'm Â¬it; I tell
                                 * you. But if the markup was I'm &notin; I tell you,
                                 * the entity would be parsed as "notin;", resulting in
                                 * I'm âˆ‰ I tell you.
                                 */
                            }

                        }
                    // XXX reorder point
                    case TokenizerState.s09_CLOSE_TAG_OPEN:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }

                            /*
                             * Otherwise, if the content model flag is set to the PCDATA
                             * state, or if the next few characters do match that tag
                             * name, consume the next input character:
                             */
                            switch (c)
                            {
                                case '>':
                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
                                    ErrLtSlashGt();
                                    /*
                                     * Switch to the data state.
                                     */
                                    reader.SkipOneAndStartCollect();
                                    //state = Transition(state, Tokenizer.DATA, reconsume, pos);
                                    state = TokenizerState.s01_DATA;
                                    goto continueStateloop;
                                case '\r':
                                    SilentCarriageReturn();
                                    /* Anything else Parse error. */
                                    ErrGarbageAfterLtSlash();
                                    /*
                                     * Switch to the bogus comment state.
                                     */
                                    ClearLongStrBufAndAppend('\n');
                                    //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.s44_BOGUS_COMMENT;
                                    goto breakStateloop;
                                case '\n':
                                    /* Anything else Parse error. */
                                    ErrGarbageAfterLtSlash();
                                    /*
                                     * Switch to the bogus comment state.
                                     */
                                    ClearLongStrBufAndAppend('\n');
                                    //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                    state = TokenizerState.s44_BOGUS_COMMENT;
                                    goto continueStateloop;
                                case '\u0000':
                                    c = '\uFFFD';
                                    // fall thru
                                    goto default;
                                default:
                                    if (c >= 'A' && c <= 'Z')
                                    {
                                        c += (char)0x20;
                                    }
                                    if (c >= 'a' && c <= 'z')
                                    {
                                        /*
                                         * U+0061 LATIN SMALL LETTER A through to U+007A
                                         * LATIN SMALL LETTER Z Create a new end tag
                                         * token,
                                         */
                                        endTag = true;
                                        /*
                                         * set its tag name to the input character,
                                         */
                                        ClearStrBufAndAppend(c);
                                        /*
                                         * then switch to the tag name state. (Don't
                                         * emit the token yet; further details will be
                                         * filled in before it is emitted.)
                                         */
                                        //state = Transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                        state = TokenizerState.s10_TAG_NAME;
                                        goto continueStateloop;
                                    }
                                    else
                                    {
                                        /* Anything else Parse error. */
                                        ErrGarbageAfterLtSlash();
                                        /*
                                         * Switch to the bogus comment state.
                                         */
                                        ClearLongStrBufAndAppend(c);
                                        //state = Transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                        state = TokenizerState.s44_BOGUS_COMMENT;
                                        goto continueStateloop;
                                    }
                            }
                        }  
                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            stateSave = state;
            returnStateSave = returnState;
        }

Example #53

Show file

File: IterationData.cs Project: mburgman101/CsQuery

        /// <summary>
        /// Close out this element. This method will return true if something can be yielded; this this
        /// means it's got a parent at the top of the heirarchy. Otherwise it's just closed but false is
        /// returned.
        /// </summary>
        ///
        /// <param name="factory">
        /// The HTML factory to operate against.
        /// </param>
        ///
        /// <returns>
        /// An enumerator that allows foreach to be used to process close element in this collection.
        /// </returns>

        public IEnumerable<IDomObject> CloseElement(HtmlElementFactory factory)
        {
            IDomObject element = null;

            if (TryGetLiteral(factory, out element))
            {
                yield return element;
            }
                
            if (Parent != null)
            {
                if (Parent.Parent == null)
                {
                    yield return Parent.Element;
                } 
                Parent.Reset(Pos);
                TokenizerState = TokenizerState.Finished;
            }
        }

Example #54

Show file

File: Tokenizer.cs Project: detlefgrohs/CSasic

        public List <Token> Tokenize(string sourceCode)
        {
            _tokens                = new List <Token>();
            _currentToken          = new Token(TokenType.Unknown, string.Empty);
            _currentTokenizerState = TokenizerState.Default;

            foreach (var character in sourceCode)
            {
ReprocessCharacter:                 // ToDo: Get rid of goto to reprocess characters
                switch (_currentTokenizerState)
                {
                case TokenizerState.Default:
                    if (SingleCharTokens.ContainsKey(character))
                    {
                        _tokens.Add(new Token(SingleCharTokens[character], character.ToString()));
                    }
                    else if (char.IsLetter(character))
                    {
                        StartToken(TokenizerState.Word, character.ToString());
                    }
                    else if (char.IsDigit(character))
                    {
                        StartToken(TokenizerState.Number, character.ToString());
                    }
                    else if (character.Equals('"'))
                    {
                        StartToken(TokenizerState.String);
                    }
                    else if (character.Equals('\''))
                    {
                        StartToken(TokenizerState.Comment);
                    }
                    break;

                case TokenizerState.Word:
                    if (char.IsLetterOrDigit(character))
                    {
                        _currentToken.Text += character;
                    }
                    else if (character.Equals(':'))
                    {
                        EndToken(TokenType.Label);
                    }
                    else
                    {
                        EndToken(TokenType.Word);
                        goto ReprocessCharacter;
                    }
                    break;

                case TokenizerState.Number:     // ToDo: support negative numbers and floating point numbers.
                    if (char.IsDigit(character))
                    {
                        _currentToken.Text += character;
                    }
                    else
                    {
                        EndToken(TokenType.Number);
                        goto ReprocessCharacter;
                    }
                    break;

                case TokenizerState.String:
                    if (character.Equals('"'))
                    {
                        EndToken(TokenType.String);
                    }
                    else
                    {
                        _currentToken.Text += character;
                    }
                    break;

                case TokenizerState.Comment:
                    if (character.Equals('\n'))
                    {
                        EndToken(TokenType.Comment);
                    }
                    break;

                default:
                    throw new Exception($"Unknown tokenizer state reached.");
                }
            }
            return(_tokens);
        }

Example #55

Show file

        public void Tokenize()
        {
            while (_remainingText != string.Empty)
            {
                switch (_state)
                {
                case TokenizerState.ReadingText:
                    var textMatch = _rawTextRegex.Match(_remainingText);
                    if (textMatch.Success)
                    {
                        string rawText = textMatch.Groups["text"].ToString();
                        if (rawText != string.Empty)
                        {
                            var rawTextToken = new Token(TokenType.RawText, rawText);
                            _tokens.Add(rawTextToken);
                        }

                        _tokens.Add(new Token(TokenType.InstructionBegin, "<$"));
                        _remainingText = _remainingText.Remove(0, textMatch.Length);
                        _state         = TokenizerState.ReadingInstruction;
                    }
                    else
                    {
                        var rawTextToken = new Token(TokenType.RawText, _remainingText);
                        _tokens.Add(rawTextToken);
                        _remainingText = string.Empty;
                        return;
                    }
                    break;

                case TokenizerState.ReadingInstruction:
                    var instructionMatch = _varInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        string variableName = instructionMatch.Groups["name"].ToString();

                        _tokens.Add(new Token(TokenType.Variable, "var:"));
                        _tokens.Add(new Token(TokenType.Identifier, variableName));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    instructionMatch = _subtemplateInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        string templateName = instructionMatch.Groups["name"].ToString();

                        _tokens.Add(new Token(TokenType.SubtemplateBegin, "subtemplate:"));
                        _tokens.Add(new Token(TokenType.Identifier, templateName));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    instructionMatch = _endSubtemplateInstructionRegex.Match(_remainingText);
                    if (instructionMatch.Success)
                    {
                        _tokens.Add(new Token(TokenType.SubtemplateEnd, "endsubtemplate"));
                        _tokens.Add(new Token(TokenType.InstructionEnd, ">"));

                        _remainingText = _remainingText.Remove(0, instructionMatch.Length);
                        _state         = TokenizerState.ReadingText;
                        continue;
                    }

                    throw new InvalidOperationException("Unable to understand the remaining text while parsing it, because some '<$...>' instruction was expected. " +
                                                        $"Remaining text began with this instead: '{string.Concat(_remainingText.Take(20))}'.");
                }
            }
        }

Example #56

Show file

File: TokenizerStateChange.cs Project: tathamoddie/RegexAnalyzer

 internal static Action<Stack<TokenizerState>> PushState(TokenizerState state)
 {
     return states => states.Push(state);
 }

Example #57

Show file

File: MsSql2005Dialect.cs Project: stuartcarnie/nhcontrib

            IEnumerator <string> IEnumerable <string> .GetEnumerator()
            {
                StringBuilder  currentToken     = new StringBuilder();
                TokenizerState state            = TokenizerState.WhiteSpace;
                int            parenthesisCount = 0;
                bool           escapeQuote      = false;

                for (int i = 0; i < original.Length; i++)
                {
                    char ch = original[i];
                    switch (state)
                    {
                    case TokenizerState.WhiteSpace:
                        if (ch == '\'')
                        {
                            state = TokenizerState.Quoted;
                            currentToken.Append(ch);
                        }
                        else if (ch == ',')
                        {
                            yield return(",");
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state = TokenizerState.InParenthesis;
                            currentToken.Append(ch);
                            parenthesisCount = 1;
                        }
                        else if (char.IsWhiteSpace(ch) == false)
                        {
                            state = TokenizerState.Token;
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.Quoted:
                        if (escapeQuote)
                        {
                            escapeQuote = false;
                            currentToken.Append(ch);
                        }
                        // handle escaping of ' by using '' or \'
                        else if (ch == '\\' || (ch == '\'' && i + 1 < original.Length && original[i + 1] == '\''))
                        {
                            escapeQuote = true;
                            currentToken.Append(ch);
                        }
                        else if (ch == '\'')
                        {
                            currentToken.Append(ch);
                            yield return(currentToken.ToString());

                            state = TokenizerState.WhiteSpace;
                            currentToken.Length = 0;
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.InParenthesis:
                        if (ch == ')' || ch == ']')
                        {
                            currentToken.Append(ch);
                            parenthesisCount -= 1;
                            if (parenthesisCount == 0)
                            {
                                yield return(currentToken.ToString());

                                currentToken.Length = 0;
                                state = TokenizerState.WhiteSpace;
                            }
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            currentToken.Append(ch);
                            parenthesisCount += 1;
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    case TokenizerState.Token:
                        if (char.IsWhiteSpace(ch))
                        {
                            yield return(currentToken.ToString());

                            currentToken.Length = 0;
                            state = TokenizerState.WhiteSpace;
                        }
                        else if (ch == ',')                                 // stop current token, and send the , as well
                        {
                            yield return(currentToken.ToString());

                            currentToken.Length = 0;
                            yield return(",");

                            state = TokenizerState.WhiteSpace;
                        }
                        else if (ch == '(' || ch == '[')
                        {
                            state            = TokenizerState.InParenthesis;
                            parenthesisCount = 1;
                            currentToken.Append(ch);
                        }
                        else if (ch == '\'')
                        {
                            state = TokenizerState.Quoted;
                            currentToken.Append(ch);
                        }
                        else
                        {
                            currentToken.Append(ch);
                        }
                        break;

                    default:
                        throw new InvalidExpressionException("Could not understand the string " + original);
                    }
                }
                if (currentToken.Length > 0)
                {
                    yield return(currentToken.ToString());
                }
            }

Example #58

Show file

File: TokenizerStateChange.cs Project: tathamoddie/RegexAnalyzer

 internal static Action<Stack<TokenizerState>> ReplaceState(TokenizerState state)
 {
     return ReplaceState(state, 1);
 }

Example #59

Show file

        public CsvToken GetToken()
        {
            if (disposed)
            {
                throw new ObjectDisposedException("tokenizer");
            }

            StringBuilder text = null;

            while (true)
            {
                switch (state)
                {
                case TokenizerState.Unknown:
                {
                    input = reader.Read();

                    if (EndOfStream != input)
                    {
                        TextPosition = TextPosition.Begin();
                        //text = new StringBuilder();
                        state = TokenizerState.Reading;

                        break;
                    }

                    state = TokenizerState.EndOfDocument;

                    break;
                }

                case TokenizerState.FlushLastToken:
                {
                    if (null != text && 0 < text.Length)
                    {
                        state = TokenizerState.EndOfDocument;
                        return(CsvToken.String(text.ToString()));
                    }

                    state = TokenizerState.Failed;

                    break;
                }

                case TokenizerState.EndOfDocument:
                {
                    return(CsvToken.End);
                }

                case TokenizerState.Reading:
                {
                    if (EndOfStream == input)
                    {
                        state = null == text ? TokenizerState.EndOfDocument : TokenizerState.FlushLastToken;
                        break;
                    }

                    if (null == text)
                    {
                        //TextPosition = TextPosition.Begin();
                        text = new StringBuilder();
                    }

                    var current = (char)input;

                    if (IsTerm(current))
                    {
                        if (0 < text.Length)
                        {
                            return(CsvToken.String(text.ToString()));
                        }

                        input = reader.Read();

                        return(CsvToken.Terminal(current));
                    }

                    text.Append(current);

                    input = reader.Read();

                    break;
                }

                default:
                {
                    throw new Exception();
                }
                }
            }
        }

Example #60

Show file

File: UrlEncodedDictionarySerializer.cs Project: Fricsay/azure-iot-sdks

            public IEnumerable<Token> GetTokens()
            {
                if (this.position >= this.value.Length)
                {
                    yield break;
                }
                int readCount = 0;
                bool readCompleted = false;
                string errorMessage = null;
                while (!readCompleted)
                {
                    switch (this.currentState)
                    {
                        case TokenizerState.ReadyToReadKey:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    errorMessage = "Unexpected string end in '{0}' state.".FormatInvariant(this.currentState);
                                    this.currentState = TokenizerState.Error;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                    case '&':
                                        errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, this.currentState);
                                        this.currentState = TokenizerState.Error;
                                        break;
                                    case '/':
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        this.currentState = TokenizerState.ReadKey;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.ReadKey:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    yield return this.CreateToken(TokenType.Key, readCount);
                                    yield return this.CreateToken(TokenType.Value, 0);
                                    readCount = 0;
                                    this.currentState = TokenizerState.Finish;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadValue;
                                        break;
                                    case '&':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        yield return this.CreateToken(TokenType.Value, 0);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadyToReadKey;
                                        break;
                                    case '/':
                                        yield return this.CreateToken(TokenType.Key, readCount);
                                        yield return this.CreateToken(TokenType.Value, 0);
                                        readCount = 0;
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        //this.currentState = TokenizerState.ReadKey;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.ReadValue:
                            {
                                if (this.position >= this.value.Length)
                                {
                                    yield return this.CreateToken(TokenType.Value, readCount);
                                    readCount = 0;
                                    this.currentState = TokenizerState.Finish;
                                    break;
                                }
                                char currentChar = this.value[this.position];
                                switch (currentChar)
                                {
                                    case '=':
                                        errorMessage = "Unexpected character '{0}' in '{1}' state.".FormatInvariant(currentChar, this.currentState);
                                        this.currentState = TokenizerState.Error;
                                        break;
                                    case '&':
                                        yield return this.CreateToken(TokenType.Value, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.ReadyToReadKey;
                                        break;
                                    case '/':
                                        yield return this.CreateToken(TokenType.Value, readCount);
                                        readCount = 0;
                                        this.currentState = TokenizerState.Finish;
                                        break;
                                    default:
                                        readCount++;
                                        //this.currentState = TokenizerState.ReadValue;
                                        break;
                                }
                                break;
                            }
                        case TokenizerState.Finish:
                        case TokenizerState.Error:
                            readCompleted = true;
                            break;
                        default:
                            throw new NotSupportedException();
                    }
                    this.position++;
                }

                if (this.currentState == TokenizerState.Error)
                {
                    throw new FormatException(errorMessage);
                }
            }