public void LoadState(Tokenizer other) { strBufLen = other.strBufLen; if (strBufLen > strBuf.Length) { strBuf = new char[strBufLen]; } Array.Copy(other.strBuf, strBuf, strBufLen); longStrBufLen = other.longStrBufLen; if (longStrBufLen > longStrBuf.Length) { longStrBuf = new char[longStrBufLen]; } Array.Copy(other.longStrBuf, longStrBuf, longStrBufLen); stateSave = other.stateSave; returnStateSave = other.returnStateSave; endTagExpectation = other.endTagExpectation; endTagExpectationAsArray = other.endTagExpectationAsArray; // line = 1; XXX line numbers lastCR = other.lastCR; index = other.index; forceQuirks = other.forceQuirks; additional = other.additional; entCol = other.entCol; firstCharKey = other.firstCharKey; lo = other.lo; hi = other.hi; candidate = other.candidate; strBufMark = other.strBufMark; prevValue = other.prevValue; value = other.value; seenDigits = other.seenDigits; endTag = other.endTag; shouldSuspend = false; if (other.doctypeName == null) { doctypeName = null; } else { doctypeName = other.doctypeName; } if (other.systemIdentifier == null) { systemIdentifier = null; } else { systemIdentifier = other.systemIdentifier; } if (other.publicIdentifier == null) { publicIdentifier = null; } else { publicIdentifier = other.publicIdentifier; } if (other.tagName == null) { tagName = null; } else { tagName = other.tagName.CloneElementName(); } if (other.attributeName == null) { attributeName = null; } else { attributeName = other.attributeName.CloneAttributeName(); } if (other.attributes == null) { attributes = null; } else { attributes = other.attributes.CloneAttributes(); } }
public void End() { strBuf = null; longStrBuf = null; doctypeName = null; systemIdentifier = null; publicIdentifier = null; tagName = null; attributeName = null; TokenHandler.EndTokenization(); if (attributes != null) { attributes.Clear(mappingLangToXmlLang); attributes = null; } }
private void StrBufToElementNameString() { tagName = ElementName.ElementNameByBuffer(strBuf, 0, strBufLen); }
private int StateLoop(int state, char c, int pos, char[] buf, bool reconsume, int returnState, int endPos) { /* * Idioms used in this code: * * * Consuming the next input character * * To consume the next input character, the code does this: if (++pos == * endPos) { goto breakStateloop; } c = checkChar(buf, pos); * * * Staying in a state * * When there's a state that the tokenizer may stay in over multiple * input characters, the state has a wrapper |for(;;)| loop and staying * in the state continues the loop. * * * Switching to another state * * To switch to another state, the code sets the state variable to the * magic number of the new state. Then it either continues stateloop or * breaks out of the state's own wrapper loop if the target state is * right after the current state in source order. (This is a partial * workaround for Java's lack of goto.) * * * Reconsume support * * The spec sometimes says that an input character is reconsumed in * another state. If a state can ever be entered so that an input * character can be reconsumed in it, the state's code starts with an * |if (reconsume)| that sets reconsume to false and skips over the * normal code for consuming a new character. * * To reconsume the current character in another state, the code sets * |reconsume| to true and then switches to the other state. * * * Emitting character tokens * * This method emits character tokens lazily. Whenever a new range of * character tokens starts, the field cstart must be set to the start * index of the range. The flushChars() method must be called at the end * of a range to flush it. * * * U+0000 handling * * The various states have to handle the replacement of U+0000 with * U+FFFD. However, if U+0000 would be reconsumed in another state, the * replacement doesn't need to happen, because it's handled by the * reconsuming state. * * * LF handling * * Every state needs to increment the line number upon LF unless the LF * gets reconsumed by another state which increments the line number. * * * CR handling * * Every state needs to handle CR unless the CR gets reconsumed and is * handled by the reconsuming state. The CR needs to be handled as if it * were and LF, the lastCR field must be set to true and then this * method must return. The IO driver will then swallow the next * character if it is an LF to coalesce CRLF. */ /* * As there is no support for labeled loops in C#, instead of break <loop>; * the port uses goto break<loop>; and a label after the loop. * Instead of continue <loop>; it uses goto continue<loop>; and a label * at the beginning or end of the loop (which doesn't matter in for(;;) loops) */ /*stateloop:*/ for (; ; ) { continueStateloop: switch (state) { case DATA: /*dataloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case '&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in data state. */ FlushChars(buf, pos); ClearStrBufAndAppend(c); SetAdditionalAndRememberAmpersandLocation('\u0000'); returnState = state; goto continueStateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the tag * open state. */ FlushChars(buf, pos); goto breakDataloop; // FALL THROUGH continue // stateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the input character as a * character token. * * Stay in the data state. */ continue; } } breakDataloop: goto case TAG_OPEN; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case TAG_OPEN: /*tagopenloop:*/ for (; ; ) { /* * The behavior of this state depends on the content * model flag. */ if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * If the content model flag is set to the PCDATA state * Consume the next input character: */ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to U+005A * LATIN CAPITAL LETTER Z Create a new start tag * token, */ endTag = false; /* * set its tag name to the lowercase version of the * input character (add 0x0020 to the character's * code point), */ ClearStrBufAndAppend((char)(c + 0x20)); /* then switch to the tag name state. */ /* * (Don't emit the token yet; further details will * be filled in before it is emitted.) */ goto breakTagopenloop; // goto continueStateloop; } else if (c >= 'a' && c <= 'z') { /* * U+0061 LATIN SMALL LETTER A through to U+007A * LATIN SMALL LETTER Z Create a new start tag * token, */ endTag = false; /* * set its tag name to the input character, */ ClearStrBufAndAppend(c); /* then switch to the tag name state. */ /* * (Don't emit the token yet; further details will * be filled in before it is emitted.) */ goto breakTagopenloop; // goto continueStateloop; } switch (c) { case '!': /* * U+0021 EXCLAMATION MARK (!) Switch to the * markup declaration open state. */ goto continueStateloop; case '/': /* * U+002F SOLIDUS (/) Switch to the close tag * open state. */ goto continueStateloop; case '?': /* * U+003F QUESTION MARK () Parse error. */ ErrProcessingInstruction(); /* * Switch to the bogus comment state. */ ClearLongStrBufAndAppend(c); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrLtGt(); /* * Emit a U+003C LESS-THAN SIGN character token * and a U+003E GREATER-THAN SIGN character * token. */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 2); /* Switch to the data state. */ cstart = pos + 1; goto continueStateloop; default: /* * Anything else Parse error. */ ErrBadCharAfterLt(c); /* * Emit a U+003C LESS-THAN SIGN character token */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state. */ cstart = pos; reconsume = true; goto continueStateloop; } } breakTagopenloop: goto case TAG_NAME; // FALL THROUGH DON'T REORDER case TAG_NAME: /*tagnameloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); StrBufToElementNameString(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state. */ StrBufToElementNameString(); goto breakTagnameloop; // goto continueStateloop; case '/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state. */ StrBufToElementNameString(); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ StrBufToElementNameString(); if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; goto default; // fall thru default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the current input * character (add 0x0020 to the character's * code point) to the current tag token's * tag name. */ c += (char)0x20; } /* * Anything else Append the current input * character to the current tag token's tag * name. */ AppendStrBuf(c); /* * Stay in the tag name state. */ continue; } } breakTagnameloop: goto case BEFORE_ATTRIBUTE_NAME; // FALLTHRU DON'T REORDER case BEFORE_ATTRIBUTE_NAME: /*beforeattributenameloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before attribute name state. */ continue; case '/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state. */ goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto case '\"'; case '\"': case '\'': case '<': case '=': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS * SIGN (=) Parse error. */ ErrBadCharBeforeAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below. */ goto default; default: /* * Anything else Start a new attribute in the * current tag token. */ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Set that * attribute's name to the lowercase version * of the current input character (add * 0x0020 to the character's code point) */ c += (char)0x20; } /* * Set that attribute's name to the current * input character, */ ClearStrBufAndAppend(c); /* * and its value to the empty string. */ // Will do later. /* * Switch to the attribute name state. */ goto breakBeforeattributenameloop; // goto continueStateloop; } } breakBeforeattributenameloop: goto case ATTRIBUTE_NAME; // FALLTHRU DON'T REORDER case ATTRIBUTE_NAME: /*attributenameloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); AttributeNameComplete(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the after attribute name state. */ AttributeNameComplete(); goto continueStateloop; case '/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state. */ AttributeNameComplete(); AddAttributeWithoutValue(); goto continueStateloop; case '=': /* * U+003D EQUALS SIGN (=) Switch to the before * attribute value state. */ AttributeNameComplete(); goto breakAttributenameloop; // goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ AttributeNameComplete(); AddAttributeWithoutValue(); if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto case '\"'; case '\"': case '\'': case '<': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) Parse error. */ ErrQuoteOrLtInAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below. */ goto default; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the current input * character (add 0x0020 to the character's * code point) to the current attribute's * name. */ c += (char)0x20; } /* * Anything else Append the current input * character to the current attribute's name. */ AppendStrBuf(c); /* * Stay in the attribute name state. */ continue; } } breakAttributenameloop: goto case BEFORE_ATTRIBUTE_VALUE; // FALLTHRU DON'T REORDER case BEFORE_ATTRIBUTE_VALUE: /*beforeattributevalueloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before attribute value state. */ continue; case '"': /* * U+0022 QUOTATION MARK (") Switch to the * attribute value (double-quoted) state. */ ClearLongStrBuf(); goto breakBeforeattributevalueloop; // goto continueStateloop; case '&': /* * U+0026 AMPERSAND (&) Switch to the attribute * value (unquoted) state and reconsume this * input character. */ ClearLongStrBuf(); NoteUnquotedAttributeValue(); reconsume = true; goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Switch to the attribute * value (single-quoted) state. */ ClearLongStrBuf(); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrAttributeValueMissing(); /* * Emit the current tag token. */ AddAttributeWithoutValue(); if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto case '<'; case '<': case '=': case '`': /* * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN * (=) U+0060 GRAVE ACCENT (`) */ ErrLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); /* * Treat it as per the "anything else" entry * below. */ goto default; default: /* * Anything else Append the current input * character to the current attribute's value. */ ClearLongStrBufAndAppend(c); /* * Switch to the attribute value (unquoted) * state. */ NoteUnquotedAttributeValue(); goto continueStateloop; } } breakBeforeattributevalueloop: goto case ATTRIBUTE_VALUE_DOUBLE_QUOTED; // FALLTHRU DON'T REORDER case ATTRIBUTE_VALUE_DOUBLE_QUOTED: /*attributevaluedoublequotedloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '"': /* * U+0022 QUOTATION MARK (") Switch to the after * attribute value (quoted) state. */ AddAttributeWithValue(); goto breakAttributevaluedoublequotedloop; // goto continueStateloop; case '&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * additional allowed character being U+0022 * QUOTATION MARK ("). */ ClearStrBufAndAppend(c); SetAdditionalAndRememberAmpersandLocation('\"'); returnState = state; goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the current input * character to the current attribute's value. */ AppendLongStrBuf(c); /* * Stay in the attribute value (double-quoted) * state. */ continue; } } breakAttributevaluedoublequotedloop: goto case AFTER_ATTRIBUTE_VALUE_QUOTED; // FALLTHRU DON'T REORDER case AFTER_ATTRIBUTE_VALUE_QUOTED: /*afterattributevaluequotedloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state. */ goto continueStateloop; case '/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state. */ goto breakAfterattributevaluequotedloop; // goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; default: /* * Anything else Parse error. */ ErrNoSpaceBetweenAttributes(); /* * Reconsume the character in the before * attribute name state. */ reconsume = true; goto continueStateloop; } } breakAfterattributevaluequotedloop: goto case SELF_CLOSING_START_TAG; // FALLTHRU DON'T REORDER case SELF_CLOSING_START_TAG: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '>': /* * U+003E GREATER-THAN SIGN (>) Set the self-closing * flag of the current tag token. Emit the current * tag token. */ if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; default: /* Anything else Parse error. */ ErrSlashNotFollowedByGt(); /* * Reconsume the character in the before attribute * name state. */ reconsume = true; goto continueStateloop; } // XXX reorder point case ATTRIBUTE_VALUE_UNQUOTED: for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); AddAttributeWithValue(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before attribute name state. */ AddAttributeWithValue(); goto continueStateloop; case '&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * additional allowed character being U+003E * GREATER-THAN SIGN (>) */ ClearStrBufAndAppend(c); SetAdditionalAndRememberAmpersandLocation('>'); returnState = state; goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ AddAttributeWithValue(); if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; goto case '<'; // fall thru case '<': case '\"': case '\'': case '=': case '`': /* * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error. */ ErrUnquotedAttributeValOrNull(c); /* * Treat it as per the "anything else" entry * below. */ // fall through goto default; default: // [NOCPP] ErrHtml4NonNameInUnquotedAttribute(c); // ]NOCPP] /* * Anything else Append the current input * character to the current attribute's value. */ AppendLongStrBuf(c); /* * Stay in the attribute value (unquoted) state. */ continue; } } // XXX reorder point case AFTER_ATTRIBUTE_NAME: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after attribute name state. */ continue; case '/': /* * U+002F SOLIDUS (/) Switch to the self-closing * start tag state. */ AddAttributeWithoutValue(); goto continueStateloop; case '=': /* * U+003D EQUALS SIGN (=) Switch to the before * attribute value state. */ goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * tag token. */ AddAttributeWithoutValue(); if (shouldSuspend) { goto breakStateloop; } /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; goto case '\"'; // fall thru case '\"': case '\'': case '<': ErrQuoteOrLtInAttributeNameOrNull(c); /* * Treat it as per the "anything else" entry * below. */ goto default; default: AddAttributeWithoutValue(); /* * Anything else Start a new attribute in the * current tag token. */ if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Set that * attribute's name to the lowercase version * of the current input character (add * 0x0020 to the character's code point) */ c += (char)0x20; } /* * Set that attribute's name to the current * input character, */ ClearStrBufAndAppend(c); /* * and its value to the empty string. */ // Will do later. /* * Switch to the attribute name state. */ goto continueStateloop; } } // XXX reorder point case MARKUP_DECLARATION_OPEN: /*markupdeclarationopenloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * If the next two characters are both U+002D * HYPHEN-MINUS characters (-), consume those two * characters, create a comment token whose data is the * empty string, and switch to the comment start state. * * Otherwise, if the next seven characters are an ASCII * case-insensitive match for the word "DOCTYPE", then * consume those characters and switch to the DOCTYPE * state. * * Otherwise, if the insertion mode is * "in foreign content" and the current node is not an * element in the HTML namespace and the next seven * characters are an case-sensitive match for the string * "[CDATA[" (the five uppercase letters "CDATA" with a * U+005B LEFT SQUARE BRACKET character before and * after), then consume those characters and switch to * the CDATA section state. * * Otherwise, is is a parse error. Switch to the bogus * comment state. The next character that is consumed, * if any, is the first character that will be in the * comment. */ switch (c) { case '-': ClearLongStrBufAndAppend(c); goto breakMarkupdeclarationopenloop; // goto continueStateloop; case 'd': case 'D': ClearLongStrBufAndAppend(c); index = 0; goto continueStateloop; case '[': if (TokenHandler.IsCDataSectionAllowed) { ClearLongStrBufAndAppend(c); index = 0; goto continueStateloop; } else { // else fall through goto default; } default: ErrBogusComment(); ClearLongStrBuf(); reconsume = true; goto continueStateloop; } } breakMarkupdeclarationopenloop: goto case MARKUP_DECLARATION_HYPHEN; // FALLTHRU DON'T REORDER case MARKUP_DECLARATION_HYPHEN: /*markupdeclarationhyphenloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case '\u0000': goto breakStateloop; case '-': ClearLongStrBuf(); goto breakMarkupdeclarationhyphenloop; // goto continueStateloop; default: ErrBogusComment(); reconsume = true; goto continueStateloop; } } breakMarkupdeclarationhyphenloop: goto case COMMENT_START; // FALLTHRU DON'T REORDER case COMMENT_START: /*commentstartloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment start state * * * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * start dash state. */ AppendLongStrBuf(c); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrPrematureEndOfComment(); /* Emit the comment token. */ EmitComment(0, pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); goto breakCommentstartloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the input character to * the comment token's data. */ AppendLongStrBuf(c); /* * Switch to the comment state. */ goto breakCommentstartloop; // goto continueStateloop; } } breakCommentstartloop: goto case COMMENT; // FALLTHRU DON'T REORDER case COMMENT: /*commentloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment state Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * end dash state */ AppendLongStrBuf(c); goto breakCommentloop; // goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the input character to * the comment token's data. */ AppendLongStrBuf(c); /* * Stay in the comment state. */ continue; } } breakCommentloop: goto case COMMENT_END_DASH; // FALLTHRU DON'T REORDER case COMMENT_END_DASH: /*commentenddashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment end dash state Consume the next input * character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment * end state */ AppendLongStrBuf(c); goto breakCommentenddashloop; // goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); goto continueStateloop; case '\u0000': c = '\uFFFD'; goto default; // fall thru default: /* * Anything else Append a U+002D HYPHEN-MINUS * (-) character and the input character to the * comment token's data. */ AppendLongStrBuf(c); /* * Switch to the comment state. */ goto continueStateloop; } } breakCommentenddashloop: goto case COMMENT_END; // FALLTHRU DON'T REORDER case COMMENT_END: /*commentendloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment end dash state Consume the next input * character: */ switch (c) { case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the comment * token. */ EmitComment(2, pos); /* * Switch to the data state. */ goto continueStateloop; case '-': /* U+002D HYPHEN-MINUS (-) Parse error. */ /* * Append a U+002D HYPHEN-MINUS (-) character to * the comment token's data. */ AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c); /* * Stay in the comment end state. */ continue; case '\r': AdjustDoubleHyphenAndAppendToLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AdjustDoubleHyphenAndAppendToLongStrBufLineFeed(); goto continueStateloop; case '!': ErrHyphenHyphenBang(); AppendLongStrBuf(c); goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Append two U+002D HYPHEN-MINUS (-) characters * and the input character to the comment * token's data. */ AdjustDoubleHyphenAndAppendToLongStrBufAndErr(c); /* * Switch to the comment state. */ goto continueStateloop; } } // XXX reorder point case COMMENT_END_BANG: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment end bang state * * Consume the next input character: */ switch (c) { case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the comment * token. */ EmitComment(3, pos); /* * Switch to the data state. */ goto continueStateloop; case '-': /* * Append two U+002D HYPHEN-MINUS (-) characters * and a U+0021 EXCLAMATION MARK (!) character * to the comment token's data. */ AppendLongStrBuf(c); /* * Switch to the comment end dash state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append two U+002D HYPHEN-MINUS * (-) characters, a U+0021 EXCLAMATION MARK (!) * character, and the input character to the * comment token's data. Switch to the comment * state. */ AppendLongStrBuf(c); /* * Switch to the comment state. */ goto continueStateloop; } } // XXX reorder point case COMMENT_START_DASH: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Comment start dash state * * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Switch to the comment end * state */ AppendLongStrBuf(c); goto continueStateloop; case '>': ErrPrematureEndOfComment(); /* Emit the comment token. */ EmitComment(1, pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Append a U+002D HYPHEN-MINUS character (-) and * the current input character to the comment * token's data. */ AppendLongStrBuf(c); /* * Switch to the comment state. */ goto continueStateloop; } // XXX reorder point case CDATA_START: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (index < 6) { // CDATA_LSQB.Length if (c == Tokenizer.CDATA_LSQB[index]) { AppendLongStrBuf(c); } else { ErrBogusComment(); reconsume = true; goto continueStateloop; } index++; continue; } else { cstart = pos; // start coalescing reconsume = true; break; // FALL THROUGH goto continueStateloop; } } goto case CDATA_SECTION; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case CDATA_SECTION: /*cdatasectionloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case ']': FlushChars(buf, pos); goto breakCdatasectionloop; // FALL THROUGH case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; // fall thru default: continue; } } breakCdatasectionloop: goto case CDATA_RSQB; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case CDATA_RSQB: /*cdatarsqb:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case ']': goto breakCdatarsqb; default: TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 1); cstart = pos; reconsume = true; goto continueStateloop; } } breakCdatarsqb: goto case CDATA_RSQB_RSQB; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case CDATA_RSQB_RSQB: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case '>': cstart = pos + 1; goto continueStateloop; default: TokenHandler.Characters(Tokenizer.RSQB_RSQB, 0, 2); cstart = pos; reconsume = true; goto continueStateloop; } // XXX reorder point case ATTRIBUTE_VALUE_SINGLE_QUOTED: /*attributevaluesinglequotedloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\'': /* * U+0027 APOSTROPHE (') Switch to the after * attribute value (quoted) state. */ AddAttributeWithValue(); goto continueStateloop; case '&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in attribute value state, with the * + additional allowed character being U+0027 * APOSTROPHE ('). */ ClearStrBufAndAppend(c); SetAdditionalAndRememberAmpersandLocation('\''); returnState = state; goto breakAttributevaluesinglequotedloop; // goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; goto default; // fall thru default: /* * Anything else Append the current input * character to the current attribute's value. */ AppendLongStrBuf(c); /* * Stay in the attribute value (double-quoted) * state. */ continue; } } breakAttributevaluesinglequotedloop: goto case CONSUME_CHARACTER_REFERENCE; // FALLTHRU DON'T REORDER case CONSUME_CHARACTER_REFERENCE: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (c == '\u0000') { goto breakStateloop; } /* * Unlike the definition is the spec, this state does not * return a value and never requires the caller to * backtrack. This state takes care of emitting characters * or appending to the current attribute value. It also * takes care of that in the case when consuming the * character reference fails. */ /* * This section defines how to consume a character * reference. This definition is used when parsing character * references in text and in attributes. * * The behavior depends on the identity of the next * character (the one immediately after the U+0026 AMPERSAND * character): */ switch (c) { case ' ': case '\t': case '\n': case '\r': // we'll reconsume! case '\u000C': case '<': case '&': EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; case '#': /* * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER * SIGN. */ AppendStrBuf('#'); goto continueStateloop; default: if (c == additional) { EmitOrAppendStrBuf(returnState); reconsume = true; goto continueStateloop; } if (c >= 'a' && c <= 'z') { firstCharKey = c - 'a' + 26; } else if (c >= 'A' && c <= 'Z') { firstCharKey = c - 'A'; } else { // No match /* * If no match can be made, then this is a parse * error. */ ErrNoNamedCharacterMatch(); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } // Didn't fail yet AppendStrBuf(c); // FALL THROUGH goto continueStateloop; break; } goto case CHARACTER_REFERENCE_HILO_LOOKUP; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case CHARACTER_REFERENCE_HILO_LOOKUP: { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (c == '\u0000') { goto breakStateloop; } /* * The data structure is as follows: * * HILO_ACCEL is a two-dimensional int array whose major * index corresponds to the second character of the * character reference (code point as index) and the * minor index corresponds to the first character of the * character reference (packed so that A-Z runs from 0 * to 25 and a-z runs from 26 to 51). This layout makes * it easier to use the sparseness of the data structure * to omit parts of it: The second dimension of the * table is null when no character reference starts with * the character corresponding to that row. * * The int value HILO_ACCEL (by these indeces) is zero * if there exists no character reference starting with * that two-letter prefix. Otherwise, the value is an * int that packs two shorts so that the higher short is * the index of the highest character reference name * with that prefix in NAMES and the lower short * corresponds to the index of the lowest character * reference name with that prefix. (It happens that the * first two character reference names share their * prefix so the packed int cannot be 0 by packing the * two shorts.) * * NAMES is an array of byte arrays where each byte * array encodes the name of a character references as * ASCII. The names omit the first two letters of the * name. (Since storing the first two letters would be * redundant with the data contained in HILO_ACCEL.) The * entries are lexically sorted. * * For a given index in NAMES, the same index in VALUES * contains the corresponding expansion as an array of * two UTF-16 code units (either the character and * U+0000 or a suggogate pair). */ int hilo = 0; if (c <= 'z') { int[] row = NamedCharactersAccel.HILO_ACCEL[c]; if (row != null) { hilo = row[firstCharKey]; } } if (hilo == 0) { /* * If no match can be made, then this is a parse * error. */ ErrNoNamedCharacterMatch(); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } // Didn't fail yet AppendStrBuf(c); lo = hilo & 0xFFFF; hi = hilo >> 16; entCol = -1; candidate = -1; strBufMark = 0; // FALL THROUGH goto continueStateloop; goto case CHARACTER_REFERENCE_TAIL; } case CHARACTER_REFERENCE_TAIL: /*outer:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (c == '\u0000') { goto breakStateloop; } entCol++; /* * Consume the maximum number of characters possible, * with the consumed characters matching one of the * identifiers in the first column of the named * character references table (in a case-sensitive * manner). */ /*loloop:*/ for (; ; ) { if (hi < lo) { goto breakOuter; } if (entCol == NamedCharacters.NAMES[lo].Length) { candidate = lo; strBufMark = strBufLen; lo++; } else if (entCol > NamedCharacters.NAMES[lo].Length) { goto breakOuter; } else if (c > NamedCharacters.NAMES[lo][entCol]) { lo++; } else { goto breakLoloop; } } breakLoloop: /*hiloop:*/ for (; ; ) { if (hi < lo) { goto breakOuter; } if (entCol == NamedCharacters.NAMES[hi].Length) { goto breakHiloop; } if (entCol > NamedCharacters.NAMES[hi].Length) { goto breakOuter; } else if (c < NamedCharacters.NAMES[hi][entCol]) { hi--; } else { goto breakHiloop; } } breakHiloop: if (hi < lo) { goto breakOuter; } AppendStrBuf(c); continue; } breakOuter: if (candidate == -1) { // reconsume deals with CR, LF or nul /* * If no match can be made, then this is a parse error. */ ErrNoNamedCharacterMatch(); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } else { // c can't be CR, LF or nul if we got here string candidateName = NamedCharacters.NAMES[candidate]; if (candidateName.Length == 0 || candidateName[candidateName.Length - 1] != ';') { /* * If the last character matched is not a U+003B * SEMICOLON (;), there is a parse error. */ if ((returnState & DATA_AND_RCDATA_MASK) != 0) { /* * If the entity is being consumed as part of an * attribute, and the last character matched is * not a U+003B SEMICOLON (;), */ char ch; if (strBufMark == strBufLen) { ch = c; } else { // if (strBufOffset != -1) { // ch = buf[strBufOffset + strBufMark]; // } else { ch = strBuf[strBufMark]; // } } if (ch == '=' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { /* * and the next character is either a U+003D * EQUALS SIGN character (=) or in the range * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, * U+0041 LATIN CAPITAL LETTER A to U+005A * LATIN CAPITAL LETTER Z, or U+0061 LATIN * SMALL LETTER A to U+007A LATIN SMALL * LETTER Z, then, for historical reasons, * all the characters that were matched * after the U+0026 AMPERSAND (&) must be * unconsumed, and nothing is returned. */ ErrNoNamedCharacterMatch(); AppendStrBufToLongStrBuf(); reconsume = true; goto continueStateloop; } } if ((returnState & DATA_AND_RCDATA_MASK) != 0) { ErrUnescapedAmpersandInterpretedAsCharacterReference(); } else { ErrNotSemicolonTerminated(); } } /* * Otherwise, return a character token for the character * corresponding to the entity name (as given by the * second column of the named character references * table). */ char[] val = NamedCharacters.VALUES[candidate]; if (val[1] == 0) { EmitOrAppendOne(val, returnState); } else { EmitOrAppendTwo(val, returnState); } // this is so complicated! if (strBufMark < strBufLen) { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { for (int i = strBufMark; i < strBufLen; i++) { AppendLongStrBuf(strBuf[i]); } } else { TokenHandler.Characters(strBuf, strBufMark, strBufLen - strBufMark); } // } } if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; /* * If the markup contains I'm ¬it; I tell you, the * entity is parsed as "not", as in, I'm ¬it; I tell * you. But if the markup was I'm ∉ I tell you, * the entity would be parsed as "notin;", resulting in * I'm ∉ I tell you. */ } // XXX reorder point case CONSUME_NCR: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); prevValue = -1; value = 0; seenDigits = false; /* * The behavior further depends on the character after the * U+0023 NUMBER SIGN: */ switch (c) { case 'x': case 'X': /* * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL * LETTER X Consume the X. * * Follow the steps below, but using the range of * characters U+0030 DIGIT ZERO through to U+0039 * DIGIT NINE, U+0061 LATIN SMALL LETTER A through * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL * LETTER F (in other words, 0-9, A-F, a-f). * * When it comes to interpreting the number, * interpret it as a hexadecimal number. */ AppendStrBuf(c); goto continueStateloop; default: /* * Anything else Follow the steps below, but using * the range of characters U+0030 DIGIT ZERO through * to U+0039 DIGIT NINE (i.e. just 0-9). * * When it comes to interpreting the number, * interpret it as a decimal number. */ reconsume = true; // FALL THROUGH goto continueStateloop; break; } // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER goto case DECIMAL_NRC_LOOP; case DECIMAL_NRC_LOOP: /*decimalloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } // Deal with overflow gracefully if (value < prevValue) { value = 0x110000; // Value above Unicode range but // within int // range } prevValue = value; /* * Consume as many characters as match the range of * characters given above. */ if (c >= '0' && c <= '9') { seenDigits = true; value *= 10; value += c - '0'; continue; } else if (c == ';') { if (seenDigits) { if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } // FALL THROUGH goto continueStateloop; goto breakDecimalloop; } else { ErrNoDigitsInNCR(); AppendStrBuf(';'); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } goto continueStateloop; } } else { /* * If no characters match the range, then don't * consume any characters (and unconsume the U+0023 * NUMBER SIGN character and, if appropriate, the X * character). This is a parse error; nothing is * returned. * * Otherwise, if the next character is a U+003B * SEMICOLON, consume that too. If it isn't, there * is a parse error. */ if (!seenDigits) { ErrNoDigitsInNCR(); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } else { ErrCharRefLacksSemicolon(); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; // FALL THROUGH goto continueStateloop; goto breakDecimalloop; } } } breakDecimalloop: goto case HANDLE_NCR_VALUE; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case HANDLE_NCR_VALUE: // WARNING previous state sets reconsume // XXX inline this case if the method size can take it HandleNcrValue(returnState); goto continueStateloop; // XXX reorder point case HEX_NCR_LOOP: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); // Deal with overflow gracefully if (value < prevValue) { value = 0x110000; // Value above Unicode range but // within int // range } prevValue = value; /* * Consume as many characters as match the range of * characters given above. */ if (c >= '0' && c <= '9') { seenDigits = true; value *= 16; value += c - '0'; continue; } else if (c >= 'A' && c <= 'F') { seenDigits = true; value *= 16; value += c - 'A' + 10; continue; } else if (c >= 'a' && c <= 'f') { seenDigits = true; value *= 16; value += c - 'a' + 10; continue; } else if (c == ';') { if (seenDigits) { if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } goto continueStateloop; } else { ErrNoDigitsInNCR(); AppendStrBuf(';'); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } goto continueStateloop; } } else { /* * If no characters match the range, then don't * consume any characters (and unconsume the U+0023 * NUMBER SIGN character and, if appropriate, the X * character). This is a parse error; nothing is * returned. * * Otherwise, if the next character is a U+003B * SEMICOLON, consume that too. If it isn't, there * is a parse error. */ if (!seenDigits) { ErrNoDigitsInNCR(); EmitOrAppendStrBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } else { ErrCharRefLacksSemicolon(); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } reconsume = true; goto continueStateloop; } } } // XXX reorder point case PLAINTEXT: /*plaintextloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case '\u0000': EmitPlaintextReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Stay in the * RAWTEXT state. */ continue; } } // XXX reorder point case CLOSE_TAG_OPEN: if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Otherwise, if the content model flag is set to the PCDATA * state, or if the next few characters do match that tag * name, consume the next input character: */ switch (c) { case '>': /* U+003E GREATER-THAN SIGN (>) Parse error. */ ErrLtSlashGt(); /* * Switch to the data state. */ cstart = pos + 1; goto continueStateloop; case '\r': SilentCarriageReturn(); /* Anything else Parse error. */ ErrGarbageAfterLtSlash(); /* * Switch to the bogus comment state. */ ClearLongStrBufAndAppend('\n'); goto breakStateloop; case '\n': SilentLineFeed(); /* Anything else Parse error. */ ErrGarbageAfterLtSlash(); /* * Switch to the bogus comment state. */ ClearLongStrBufAndAppend('\n'); goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: if (c >= 'A' && c <= 'Z') { c += (char)0x20; } if (c >= 'a' && c <= 'z') { /* * U+0061 LATIN SMALL LETTER A through to U+007A * LATIN SMALL LETTER Z Create a new end tag * token, */ endTag = true; /* * set its tag name to the input character, */ ClearStrBufAndAppend(c); /* * then switch to the tag name state. (Don't * emit the token yet; further details will be * filled in before it is emitted.) */ goto continueStateloop; } else { /* Anything else Parse error. */ ErrGarbageAfterLtSlash(); /* * Switch to the bogus comment state. */ ClearLongStrBufAndAppend(c); goto continueStateloop; } } // XXX reorder point case RCDATA: /*rcdataloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case '&': /* * U+0026 AMPERSAND (&) Switch to the character * reference in RCDATA state. */ FlushChars(buf, pos); ClearStrBufAndAppend(c); additional = '\u0000'; returnState = state; goto continueStateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * RCDATA less-than sign state. */ FlushChars(buf, pos); returnState = state; goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Emit the current input character as a * character token. Stay in the RCDATA state. */ continue; } } // XXX reorder point case RAWTEXT: /*rawtextloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * RAWTEXT less-than sign state. */ FlushChars(buf, pos); returnState = state; goto breakRawtextloop; // FALL THRU goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Emit the current input character as a * character token. Stay in the RAWTEXT state. */ continue; } } breakRawtextloop: goto case RAWTEXT_RCDATA_LESS_THAN_SIGN; // XXX fallthru don't reorder case RAWTEXT_RCDATA_LESS_THAN_SIGN: /*rawtextrcdatalessthansignloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case '/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data end tag open state. */ index = 0; ClearStrBuf(); goto breakRawtextrcdatalessthansignloop; // FALL THRU goto continueStateloop; default: /* * Otherwise, emit a U+003C LESS-THAN SIGN * character token */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state. */ cstart = pos; reconsume = true; goto continueStateloop; } } breakRawtextrcdatalessthansignloop: goto case NON_DATA_END_TAG_NAME; // XXX fall thru. don't reorder. case NON_DATA_END_TAG_NAME: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * ASSERT! when entering this state, set index to 0 and * call clearStrBuf() assert (contentModelElement != * null); Let's implement the above without lookahead. * strBuf is the 'temporary buffer'. */ if (index < endTagExpectationAsArray.Length) { char e = endTagExpectationAsArray[index]; char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded != e) { TokenHandler.Characters(Tokenizer.LT_SOLIDUS, 0, 2); EmitStrBuf(); cstart = pos; reconsume = true; goto continueStateloop; } AppendStrBuf(c); index++; continue; } else { endTag = true; // XXX replace contentModelElement with different // type tagName = endTagExpectation; switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE * FEED (LF) U+000C FORM FEED (FF) U+0020 * SPACE If the current end tag token is an * appropriate end tag token, then switch to * the before attribute name state. */ goto continueStateloop; case '/': /* * U+002F SOLIDUS (/) If the current end tag * token is an appropriate end tag token, * then switch to the self-closing start tag * state. */ goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) If the * current end tag token is an appropriate * end tag token, then emit the current tag * token and switch to the data state. */ if (shouldSuspend) { goto breakStateloop; } goto continueStateloop; default: /* * Emit a U+003C LESS-THAN SIGN character * token, a U+002F SOLIDUS character token, * a character token for each of the * characters in the temporary buffer (in * the order they were added to the buffer), * and reconsume the current input character * in the RAWTEXT state. */ TokenHandler.Characters( Tokenizer.LT_SOLIDUS, 0, 2); EmitStrBuf(); if (c == '\u0000') { EmitReplacementCharacter(buf, pos); } else { cstart = pos; // don't drop the // character } goto continueStateloop; } } } // XXX reorder point // BEGIN HOTSPOT WORKAROUND case BOGUS_COMMENT: /*boguscommentloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume every character up to and including the first * U+003E GREATER-THAN SIGN character (>) or the end of * the file (EOF), whichever comes first. Emit a comment * token whose data is the concatenation of all the * characters starting from and including the character * that caused the state machine to switch into the * bogus comment state, up to and including the * character immediately before the last consumed * character (i.e. up to the character just before the * U+003E or EOF character). (If the comment was started * by the end of the file (EOF), the token is empty.) * * Switch to the data state. * * If the end of the file was reached, reconsume the EOF * character. */ switch (c) { case '>': EmitComment(0, pos); goto continueStateloop; case '-': AppendLongStrBuf(c); goto breakBoguscommentloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: AppendLongStrBuf(c); continue; } } breakBoguscommentloop: goto case BOGUS_COMMENT_HYPHEN; // FALLTHRU DON'T REORDER case BOGUS_COMMENT_HYPHEN: /*boguscommenthyphenloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case '>': EmitComment(0, pos); goto continueStateloop; case '-': AppendSecondHyphenToBogusComment(); goto continueBoguscommenthyphenloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: AppendLongStrBuf(c); goto continueStateloop; } continueBoguscommenthyphenloop: continue; } // XXX reorder point case SCRIPT_DATA: /*scriptdataloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } switch (c) { case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data less-than sign state. */ FlushChars(buf, pos); returnState = state; goto breakScriptdataloop; // FALL THRU continue // stateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data state. */ continue; } } breakScriptdataloop: goto case SCRIPT_DATA_LESS_THAN_SIGN; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_LESS_THAN_SIGN: /*scriptdatalessthansignloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); switch (c) { case '/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data end tag open state. */ index = 0; ClearStrBuf(); goto continueStateloop; case '!': TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); cstart = pos; goto breakScriptdatalessthansignloop; // FALL THRU // continue // stateloop; default: /* * Otherwise, emit a U+003C LESS-THAN SIGN * character token */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); /* * and reconsume the current input character in * the data state. */ cstart = pos; reconsume = true; goto continueStateloop; } } breakScriptdatalessthansignloop: goto case SCRIPT_DATA_ESCAPE_START; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPE_START: /*scriptdataescapestartloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escape start dash state. */ goto breakScriptdataescapestartloop; // FALL THRU // continue // stateloop; default: /* * Anything else Reconsume the current input * character in the script data state. */ reconsume = true; goto continueStateloop; } } breakScriptdataescapestartloop: goto case SCRIPT_DATA_ESCAPE_START_DASH; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPE_START_DASH: /*scriptdataescapestartdashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash dash state. */ goto breakScriptdataescapestartdashloop; // goto continueStateloop; default: /* * Anything else Reconsume the current input * character in the script data state. */ reconsume = true; goto continueStateloop; } } breakScriptdataescapestartdashloop: goto case SCRIPT_DATA_ESCAPED_DASH_DASH; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPED_DASH_DASH: /*scriptdataescapeddashdashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Stay in the * script data escaped dash dash state. */ continue; case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state. */ FlushChars(buf, pos); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit a U+003E * GREATER-THAN SIGN character token. Switch to * the script data state. */ goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); goto breakScriptdataescapeddashdashloop; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data escaped state. */ goto breakScriptdataescapeddashdashloop; // goto continueStateloop; } } breakScriptdataescapeddashdashloop: goto case SCRIPT_DATA_ESCAPED; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPED: /*scriptdataescapedloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash state. */ goto breakScriptdataescapedloop; // FALL THRU // continue // stateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state. */ FlushChars(buf, pos); goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data escaped state. */ continue; } } breakScriptdataescapedloop: goto case SCRIPT_DATA_ESCAPED_DASH; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPED_DASH: /*scriptdataescapeddashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data escaped dash dash state. */ goto continueStateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Switch to the * script data escaped less-than sign state. */ FlushChars(buf, pos); goto breakScriptdataescapeddashloop; // goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); goto continueStateloop; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data escaped state. */ goto continueStateloop; } } breakScriptdataescapeddashloop: goto case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: /*scriptdataescapedlessthanloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '/': /* * U+002F SOLIDUS (/) Set the temporary buffer * to the empty string. Switch to the script * data escaped end tag open state. */ index = 0; ClearStrBuf(); returnState = Tokenizer.SCRIPT_DATA_ESCAPED; goto continueStateloop; case 'S': case 's': /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Emit a U+003C * LESS-THAN SIGN character token and the * current input character as a character token. */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); cstart = pos; index = 1; /* * Set the temporary buffer to the empty string. * Append the lowercase version of the current * input character (add 0x0020 to the * character's code point) to the temporary * buffer. Switch to the script data double * escape start state. */ goto breakScriptdataescapedlessthanloop; // goto continueStateloop; default: /* * Anything else Emit a U+003C LESS-THAN SIGN * character token and reconsume the current * input character in the script data escaped * state. */ TokenHandler.Characters(Tokenizer.LT_GT, 0, 1); cstart = pos; reconsume = true; goto continueStateloop; } } breakScriptdataescapedlessthanloop: goto case SCRIPT_DATA_DOUBLE_ESCAPE_START; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPE_START: /*scriptdatadoubleescapestartloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); Debug.Assert(index > 0); if (index < 6) { // SCRIPT_ARR.Length char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded != Tokenizer.SCRIPT_ARR[index]) { reconsume = true; goto continueStateloop; } index++; continue; } switch (c) { case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; case ' ': case '\t': case '\u000C': case '/': case '>': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN * (>) Emit the current input character as a * character token. If the temporary buffer is * the string "script", then switch to the * script data double escaped state. */ goto breakScriptdatadoubleescapestartloop; // goto continueStateloop; default: /* * Anything else Reconsume the current input * character in the script data escaped state. */ reconsume = true; goto continueStateloop; } } breakScriptdatadoubleescapestartloop: goto case SCRIPT_DATA_DOUBLE_ESCAPED; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPED: /*scriptdatadoubleescapedloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data double escaped dash state. */ goto breakScriptdatadoubleescapedloop; // FALL THRU // continue // stateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state. */ goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); continue; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Stay in the * script data double escaped state. */ continue; } } breakScriptdatadoubleescapedloop: goto case SCRIPT_DATA_DOUBLE_ESCAPED_DASH; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: /*scriptdatadoubleescapeddashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Switch to the * script data double escaped dash dash state. */ goto breakScriptdatadoubleescapeddashloop; // goto continueStateloop; case '<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state. */ goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); goto continueStateloop; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data double escaped state. */ goto continueStateloop; } } breakScriptdatadoubleescapeddashloop: goto case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: /*scriptdatadoubleescapeddashdashloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '-': /* * U+002D HYPHEN-MINUS (-) Emit a U+002D * HYPHEN-MINUS character token. Stay in the * script data double escaped dash dash state. */ continue; case '<': /* * U+003C LESS-THAN SIGN (<) Emit a U+003C * LESS-THAN SIGN character token. Switch to the * script data double escaped less-than sign * state. */ goto breakScriptdatadoubleescapeddashdashloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit a U+003E * GREATER-THAN SIGN character token. Switch to * the script data state. */ goto continueStateloop; case '\u0000': EmitReplacementCharacter(buf, pos); goto continueStateloop; case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto default; default: /* * Anything else Emit the current input * character as a character token. Switch to the * script data double escaped state. */ goto continueStateloop; } } breakScriptdatadoubleescapeddashdashloop: goto case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: /*scriptdatadoubleescapedlessthanloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '/': /* * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS * character token. Set the temporary buffer to * the empty string. Switch to the script data * double escape end state. */ index = 0; goto breakScriptdatadoubleescapedlessthanloop; default: /* * Anything else Reconsume the current input * character in the script data double escaped * state. */ reconsume = true; goto continueStateloop; } } breakScriptdatadoubleescapedlessthanloop: goto case SCRIPT_DATA_DOUBLE_ESCAPE_END; // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER case SCRIPT_DATA_DOUBLE_ESCAPE_END: /*scriptdatadoubleescapeendloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (index < 6) { // SCRIPT_ARR.Length char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded != Tokenizer.SCRIPT_ARR[index]) { reconsume = true; goto continueStateloop; } index++; continue; } switch (c) { case '\r': EmitCarriageReturn(buf, pos); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; case ' ': case '\t': case '\u000C': case '/': case '>': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN * (>) Emit the current input character as a * character token. If the temporary buffer is * the string "script", then switch to the * script data escaped state. */ goto continueStateloop; default: /* * Reconsume the current input character in the * script data double escaped state. */ reconsume = true; goto continueStateloop; } } // XXX reorder point case MARKUP_DECLARATION_OCTYPE: /*markupdeclarationdoctypeloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); if (index < 6) { // OCTYPE.Length char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded == Tokenizer.OCTYPE[index]) { AppendLongStrBuf(c); } else { ErrBogusComment(); reconsume = true; goto continueStateloop; } index++; continue; } else { reconsume = true; goto breakMarkupdeclarationdoctypeloop; // goto continueStateloop; } } breakMarkupdeclarationdoctypeloop: goto case DOCTYPE; // FALLTHRU DON'T REORDER case DOCTYPE: /*doctypeloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } InitDoctypeFields(); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE name state. */ goto breakDoctypeloop; // goto continueStateloop; default: /* * Anything else Parse error. */ ErrMissingSpaceBeforeDoctypeName(); /* * Reconsume the current character in the before * DOCTYPE name state. */ reconsume = true; goto breakDoctypeloop; // goto continueStateloop; } } breakDoctypeloop: goto case BEFORE_DOCTYPE_NAME; // FALLTHRU DON'T REORDER case BEFORE_DOCTYPE_NAME: /*beforedoctypenameloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE name state. */ continue; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrNamelessDoctype(); /* * Create a new DOCTYPE token. Set its * force-quirks flag to on. */ forceQuirks = true; /* * Emit the token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: if (c >= 'A' && c <= 'Z') { /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Create a * new DOCTYPE token. Set the token's name * to the lowercase version of the input * character (add 0x0020 to the character's * code point). */ c += (char)0x20; } /* Anything else Create a new DOCTYPE token. */ /* * Set the token's name name to the current * input character. */ ClearStrBufAndAppend(c); /* * Switch to the DOCTYPE name state. */ goto breakBeforedoctypenameloop; // goto continueStateloop; } } breakBeforedoctypenameloop: goto case DOCTYPE_NAME; // FALLTHRU DON'T REORDER case DOCTYPE_NAME: /*doctypenameloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); StrBufToDoctypeName(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the after DOCTYPE name state. */ StrBufToDoctypeName(); goto breakDoctypenameloop; // goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token. */ StrBufToDoctypeName(); EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * U+0041 LATIN CAPITAL LETTER A through to * U+005A LATIN CAPITAL LETTER Z Append the * lowercase version of the input character (add * 0x0020 to the character's code point) to the * current DOCTYPE token's name. */ if (c >= 'A' && c <= 'Z') { c += (char)0x0020; } /* * Anything else Append the current input * character to the current DOCTYPE token's * name. */ AppendStrBuf(c); /* * Stay in the DOCTYPE name state. */ continue; } } breakDoctypenameloop: goto case AFTER_DOCTYPE_NAME; // FALLTHRU DON'T REORDER case AFTER_DOCTYPE_NAME: /*afterdoctypenameloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after DOCTYPE name state. */ continue; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case 'p': case 'P': index = 0; goto breakAfterdoctypenameloop; // goto continueStateloop; case 's': case 'S': index = 0; goto continueStateloop; default: /* * Otherwise, this is the parse error. */ BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakAfterdoctypenameloop: goto case DOCTYPE_UBLIC; // FALLTHRU DON'T REORDER case DOCTYPE_UBLIC: /*doctypeublicloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * If the six characters starting from the current input * character are an ASCII case-insensitive match for the * word "PUBLIC", then consume those characters and * switch to the before DOCTYPE public identifier state. */ if (index < 5) { // UBLIC.Length char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded != Tokenizer.UBLIC[index]) { BogusDoctype(); // forceQuirks = true; reconsume = true; goto continueStateloop; } index++; continue; } else { reconsume = true; goto breakDoctypeublicloop; // goto continueStateloop; } } breakDoctypeublicloop: goto case AFTER_DOCTYPE_PUBLIC_KEYWORD; // FALLTHRU DON'T REORDER case AFTER_DOCTYPE_PUBLIC_KEYWORD: /*afterdoctypepublickeywordloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE public * identifier state. */ goto breakAfterdoctypepublickeywordloop; // FALL THROUGH continue stateloop case '"': /* * U+0022 QUOTATION MARK (") Parse Error. */ ErrNoSpaceBetweenDoctypePublicKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. */ goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Parse Error. */ ErrNoSpaceBetweenDoctypePublicKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. */ goto continueStateloop; case '>': /* U+003E GREATER-THAN SIGN (>) Parse error. */ ErrExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakAfterdoctypepublickeywordloop: goto case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; // FALLTHRU DON'T REORDER case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: /*beforedoctypepublicidentifierloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE public identifier * state. */ continue; case '"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's public identifier to the empty string * (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. */ goto breakBeforedoctypepublicidentifierloop; // goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * public identifier to the empty string (not * missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. */ goto continueStateloop; case '>': /* U+003E GREATER-THAN SIGN (>) Parse error. */ ErrExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakBeforedoctypepublicidentifierloop: goto case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; // FALLTHRU DON'T REORDER case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: /*doctypepublicidentifierdoublequotedloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '"': /* * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE public identifier state. */ publicIdentifier = LongStrBufToString(); goto breakDoctypepublicidentifierdoublequotedloop; // goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrGtInPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ publicIdentifier = LongStrBufToString(); EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * public identifier. */ AppendLongStrBuf(c); /* * Stay in the DOCTYPE public identifier * (double-quoted) state. */ continue; } } breakDoctypepublicidentifierdoublequotedloop: goto case AFTER_DOCTYPE_PUBLIC_IDENTIFIER; // FALLTHRU DON'T REORDER case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: /*afterdoctypepublicidentifierloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the between DOCTYPE public and * system identifiers state. */ goto breakAfterdoctypepublicidentifierloop; // goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '"': /* * U+0022 QUOTATION MARK (") Parse error. */ ErrNoSpaceBetweenPublicAndSystemIds(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. */ goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Parse error. */ ErrNoSpaceBetweenPublicAndSystemIds(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakAfterdoctypepublicidentifierloop: goto case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; // FALLTHRU DON'T REORDER case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: /*betweendoctypepublicandsystemidentifiersloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the between DOCTYPE public and system * identifiers state. */ continue; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's system identifier to the empty string * (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. */ goto breakBetweendoctypepublicandsystemidentifiersloop; // goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * system identifier to the empty string (not * missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakBetweendoctypepublicandsystemidentifiersloop: goto case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; // FALLTHRU DON'T REORDER case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: /*doctypesystemidentifierdoublequotedloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '"': /* * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE system identifier state. */ systemIdentifier = LongStrBufToString(); goto continueStateloop; case '>': /* * U+003E GREATER-THAN SIGN (>) Parse error. */ ErrGtInSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ systemIdentifier = LongStrBufToString(); EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * system identifier. */ AppendLongStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state. */ continue; } } breakDoctypesystemidentifierdoublequotedloop: goto case AFTER_DOCTYPE_SYSTEM_IDENTIFIER; // FALLTHRU DON'T REORDER case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: /*afterdoctypesystemidentifierloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); goto case ' '; // fall thru case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the after DOCTYPE system identifier state. */ continue; case '>': /* * U+003E GREATER-THAN SIGN (>) Emit the current * DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; default: /* * Switch to the bogus DOCTYPE state. (This does * not set the DOCTYPE token's force-quirks flag * to on.) */ BogusDoctypeWithoutQuirks(); goto breakAfterdoctypesystemidentifierloop; // goto continueStateloop; } } breakAfterdoctypesystemidentifierloop: goto case BOGUS_DOCTYPE; // FALLTHRU DON'T REORDER case BOGUS_DOCTYPE: for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '>': /* * U+003E GREATER-THAN SIGN (>) Emit that * DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto default; default: /* * Anything else Stay in the bogus DOCTYPE * state. */ continue; } } // XXX reorder point case DOCTYPE_YSTEM: /*doctypeystemloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Otherwise, if the six characters starting from the * current input character are an ASCII case-insensitive * match for the word "SYSTEM", then consume those * characters and switch to the before DOCTYPE system * identifier state. */ if (index < 5) { // YSTEM.Length char folded = c; if (c >= 'A' && c <= 'Z') { folded += (char)0x20; } if (folded != Tokenizer.YSTEM[index]) { BogusDoctype(); reconsume = true; goto continueStateloop; } index++; goto continueStateloop; } else { reconsume = true; goto breakDoctypeystemloop; // goto continueStateloop; } } breakDoctypeystemloop: goto case AFTER_DOCTYPE_SYSTEM_KEYWORD; // FALLTHRU DON'T REORDER case AFTER_DOCTYPE_SYSTEM_KEYWORD: /*afterdoctypesystemkeywordloop:*/ for (; ; ) { if (reconsume) { reconsume = false; } else { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); } /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE * Switch to the before DOCTYPE public * identifier state. */ goto breakAfterdoctypesystemkeywordloop; // FALL THROUGH continue stateloop case '"': /* * U+0022 QUOTATION MARK (") Parse Error. */ ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote(); /* * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. */ goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Parse Error. */ ErrNoSpaceBetweenDoctypeSystemKeywordAndQuote(); /* * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. */ goto continueStateloop; case '>': /* U+003E GREATER-THAN SIGN (>) Parse error. */ ErrExpectedPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakAfterdoctypesystemkeywordloop: goto case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; // FALLTHRU DON'T REORDER case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: /*beforedoctypesystemidentifierloop:*/ for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\r': SilentCarriageReturn(); goto breakStateloop; case '\n': SilentLineFeed(); // fall thru goto case ' '; case ' ': case '\t': case '\u000C': /* * U+0009 CHARACTER TABULATION U+000A LINE FEED * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay * in the before DOCTYPE system identifier * state. */ continue; case '"': /* * U+0022 QUOTATION MARK (") Set the DOCTYPE * token's system identifier to the empty string * (not missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. */ goto continueStateloop; case '\'': /* * U+0027 APOSTROPHE (') Set the DOCTYPE token's * system identifier to the empty string (not * missing), */ ClearLongStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. */ goto breakBeforedoctypesystemidentifierloop; // goto continueStateloop; case '>': /* U+003E GREATER-THAN SIGN (>) Parse error. */ ErrExpectedSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; default: BogusDoctype(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ // done by bogusDoctype(); /* * Switch to the bogus DOCTYPE state. */ goto continueStateloop; } } breakBeforedoctypesystemidentifierloop: goto case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; // FALLTHRU DON'T REORDER case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\'': /* * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE system identifier state. */ systemIdentifier = LongStrBufToString(); goto continueStateloop; case '>': ErrGtInSystemId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ systemIdentifier = LongStrBufToString(); EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * system identifier. */ AppendLongStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state. */ continue; } } // XXX reorder point case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: for (; ; ) { if (++pos == endPos) { goto breakStateloop; } c = CheckChar(buf, pos); /* * Consume the next input character: */ switch (c) { case '\'': /* * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE public identifier state. */ publicIdentifier = LongStrBufToString(); goto continueStateloop; case '>': ErrGtInPublicId(); /* * Set the DOCTYPE token's force-quirks flag to * on. */ forceQuirks = true; /* * Emit that DOCTYPE token. */ publicIdentifier = LongStrBufToString(); EmitDoctypeToken(pos); /* * Switch to the data state. */ goto continueStateloop; case '\r': AppendLongStrBufCarriageReturn(); goto breakStateloop; case '\n': AppendLongStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru goto default; default: /* * Anything else Append the current input * character to the current DOCTYPE token's * public identifier. */ AppendLongStrBuf(c); /* * Stay in the DOCTYPE public identifier * (single-quoted) state. */ continue; } } // END HOTSPOT WORKAROUND } } // stateloop breakStateloop: FlushChars(buf, pos); /* * if (prevCR && pos != endPos) { // why is this needed pos--; col--; } */ // Save locals stateSave = state; returnStateSave = returnState; return pos; }
/** * The constructor. * * @param tokenHandler * the handler for receiving tokens */ public Tokenizer(ITreeBuilder tokenHandler) { this.TokenHandler = tokenHandler; this.bmpChar = new char[1]; this.astralChar = new char[2]; this.tagName = null; this.attributeName = null; this.doctypeName = null; this.publicIdentifier = null; this.systemIdentifier = null; this.attributes = null; }
private int EmitCurrentTagToken(bool selfClosing, int pos) { cstart = pos + 1; MaybeErrSlashInEndTag(selfClosing); stateSave = Tokenizer.DATA; HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES : attributes); if (endTag) { /* * When an end tag token is emitted, the content model flag must be * switched to the PCDATA state. */ MaybeErrAttributesOnEndTag(attrs); TokenHandler.EndTag(tagName); } else { TokenHandler.StartTag(tagName, attrs, selfClosing); } tagName = null; ResetAttributes(); /* * The token handler may have called setStateAndEndTagExpectation * and changed stateSave since the start of this method. */ return stateSave; }
/** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal */ public void SetStateAndEndTagExpectation(int specialTokenizerState, ElementName endTagExpectation) { this.stateSave = specialTokenizerState; this.endTagExpectation = endTagExpectation; EndTagExpectationToArray(); }
// For the token handler to call /** * Sets the tokenizer state and the associated element name. This should * only ever used to put the tokenizer into one of the states that have * a special end tag expectation. * * @param specialTokenizerState * the tokenizer state to set * @param endTagExpectation * the expected end tag for transitioning back to normal */ public void SetStateAndEndTagExpectation(int specialTokenizerState, String endTagExpectation) { this.stateSave = specialTokenizerState; if (specialTokenizerState == Tokenizer.DATA) { return; } char[] asArray = endTagExpectation.ToCharArray(); this.endTagExpectation = new ElementName(endTagExpectation); EndTagExpectationToArray(); }
public void ResetToDataState() { strBufLen = 0; longStrBufLen = 0; stateSave = Tokenizer.DATA; // line = 1; XXX line numbers lastCR = false; index = 0; forceQuirks = false; additional = '\u0000'; entCol = -1; firstCharKey = -1; lo = 0; hi = 0; // will always be overwritten before use anyway candidate = -1; strBufMark = 0; prevValue = -1; value = 0; seenDigits = false; endTag = false; shouldSuspend = false; InitDoctypeFields(); if (tagName != null) { tagName = null; } if (attributeName != null) { attributeName = null; } if (attributes != null) { attributes = null; } }
private void StrBufToElementNameString() { // if (strBufOffset != -1) { // return ElementName.elementNameByBuffer(buf, strBufOffset, strBufLen); // } else { tagName = ElementName.ElementNameByBuffer(strBuf, 0, strBufLen); // } }