示例#1
0
 void EmitOrAppendStrBuf(InterLexerState returnState)
 {
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendStrBufToLongStrBuf();
     }
     else
     {
         EmitStrBuf();
     }
 }
示例#2
0
        protected void SetInterLexerState(InterLexerState interLexerState)
        {

        }
示例#3
0
 /**
  * Sets the tokenizer state and the associated element name. This should 
  * only ever used to put the tokenizer into one of the states that have
  * a special end tag expectation.
  * 
  * @param specialTokenizerState
  *            the tokenizer state to set
  * @param endTagExpectation
  *            the expected end tag for transitioning back to normal
  */
 public void SetStateAndEndTagExpectation(InterLexerState specialTokenizerState,
         ElementName endTagExpectation)
 {
     this.stateSave = specialTokenizerState;
     this.endTagExpectation = endTagExpectation;
     EndTagExpectationToArray();
 }
示例#4
0
        // ]NOCPP]

        // For the token handler to call
        /**
         * Sets the tokenizer state and the associated element name. This should 
         * only ever used to put the tokenizer into one of the states that have
         * a special end tag expectation.
         * 
         * @param specialTokenizerState
         *            the tokenizer state to set
         * @param endTagExpectation
         *            the expected end tag for transitioning back to normal
         */
        public void SetStateAndEndTagExpectation(InterLexerState specialTokenizerState,
                [Local] String endTagExpectation)
        {
            this.stateSave = specialTokenizerState;
            if (specialTokenizerState == InterLexerState.s01_DATA_i)
            {
                return;
            }
            this.endTagExpectation = ElementName.ElementNameByBuffer(endTagExpectation.ToCharArray());
            EndTagExpectationToArray();
        }
示例#5
0
 public void ResetToDataState()
 {
     this.strBuffer = new StringBuilder();
     this.longStrBuffer = new StringBuilder();
     stateSave = InterLexerState.s01_DATA_i;
     // line = 1; XXX line numbers
     lastCR = false;
     index = 0;
     forceQuirks = false;
     additional = '\u0000';
     entCol = -1;
     firstCharKey = -1;
     lo = 0;
     hi = 0; // will always be overwritten before use anyway
     candidate = -1;
     strBufMark = 0;
     prevValue = -1;
     value = 0;
     seenDigits = false;
     endTag = false;
     // Removed J. Treworgy 12/7/2012 - this should remain true so the parser can choose to abort 
     //shouldSuspend = false;
     InitDoctypeFields();
     if (tagName != null)
     {
         tagName = null;
     }
     if (attributeName != null)
     {
         attributeName = null;
     }
     // [NOCPP[
     if (newAttributesEachTime)
     {
         // ]NOCPP]
         if (attributes != null)
         {
             attributes = null;
         }
         // [NOCPP[
     }
     // ]NOCPP]
 }
示例#6
0
 void HandleNcrValue(InterLexerState returnState)
 {
     /*
      * If one or more characters match the range, then take them all and
      * interpret the string of characters as a number (either hexadecimal or
      * decimal as appropriate).
      */
     if (value <= 0xFFFF)
     {
         if (value >= 0x80 && value <= 0x9f)
         {
             /*
              * If that number is one of the numbers in the first column of
              * the following table, then this is a parse error.
              */
             ErrNcrInC1Range();
             /*
              * Find the row with that number in the first column, and return
              * a character token for the Unicode character given in the
              * second column of that row.
              */
             char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
             EmitOrAppendOne(val, returnState);
             // [NOCPP[
         }
         else if (value == 0xC
               && contentSpacePolicy != XmlViolationPolicy.Allow)
         {
             if (contentSpacePolicy == XmlViolationPolicy.AlterInfoset)
             {
                 EmitOrAppendOne(SPACE, returnState);
             }
             else if (contentSpacePolicy == XmlViolationPolicy.Fatal)
             {
                 Fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
             }
             // ]NOCPP]
         }
         else if (value == 0x0)
         {
             ErrNcrZero();
             EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
         }
         else if ((value & 0xF800) == 0xD800)
         {
             ErrNcrSurrogate();
             EmitOrAppendOne(Tokenizer2.REPLACEMENT_CHARACTER, returnState);
         }
         else
         {
             /*
              * Otherwise, return a character token for the Unicode character
              * whose code point is that number.
              */
             char ch = (char)value;
             // [NOCPP[
             if (value == 0x0D)
             {
                 ErrNcrCr();
             }
             else if ((value <= 0x0008) || (value == 0x000B)
                   || (value >= 0x000E && value <= 0x001F))
             {
                 ch = ErrNcrControlChar(ch);
             }
             else if (value >= 0xFDD0 && value <= 0xFDEF)
             {
                 ErrNcrUnassigned();
             }
             else if ((value & 0xFFFE) == 0xFFFE)
             {
                 ch = ErrNcrNonCharacter(ch);
             }
             else if (value >= 0x007F && value <= 0x009F)
             {
                 ErrNcrControlChar();
             }
             else
             {
                 MaybeWarnPrivateUse(ch);
             }
             // ]NOCPP]
             bmpChar[0] = ch;
             EmitOrAppendOne(bmpChar, returnState);
         }
     }
     else if (value <= 0x10FFFF)
     {
         // [NOCPP[
         MaybeWarnPrivateUseAstral();
         if ((value & 0xFFFE) == 0xFFFE)
         {
             ErrAstralNonCharacter(value);
         }
         // ]NOCPP]
         astralChar[0] = (char)(LEAD_OFFSET + (value >> 10));
         astralChar[1] = (char)(0xDC00 + (value & 0x3FF));
         EmitOrAppendTwo(astralChar, returnState);
     }
     else
     {
         ErrNcrOutOfRange();
         EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
     }
 }
示例#7
0
 void EmitOrAppendOne(char[] val, InterLexerState returnState)
 {
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
     }
     else
     {
         TokenListener.Characters(val, 0, 1);
     }
 }
示例#8
0
 void EmitOrAppendTwo(char[] val, InterLexerState returnState)
 {
     //TODO: review here=>   use != or == ?
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
         AppendLongStrBuf(val[1]);
     }
     else
     {
         TokenListener.Characters(val, 0, 2);
     }
 }
        void StateLoop3(InterLexerState state, InterLexerState returnState)
        {
            var subLexerTagAndAttr = new SubLexerTagAndAttr();
            var subLexerComment = new SubLexerComment();
            var subLexerScriptData = new SubLexerScriptData();
            var subLexerNCR = new SubLexerNCR();
            var subLexerDocType = new SubLexerDocType();
            var subLexerRawText = new SubLexerCData();


            for (; ; )
            {
            //*************
            continueStateloop:
                //************* 
                switch (state)
                {
                    case InterLexerState.s01_DATA_i:
                        /*dataloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {
                                switch (c)
                                {
                                    case '&':
                                        /*
                                         * U+0026 AMPERSAND (&) Switch to the character
                                         * reference in data state.
                                         */
                                        FlushChars();
                                        ClearStrBufAndAppend(c);
                                        SetAdditionalAndRememberAmpersandLocation('\u0000');
                                        returnState = state;
                                        //state = Transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);

                                        state = InterLexerState.CONSUME_CHARACTER_REFERENCE_i;
                                        goto continueStateloop;
                                    case '<':
                                        /*
                                         * U+003C LESS-THAN SIGN (<) Switch to the tag
                                         * open state.
                                         */
                                        FlushChars();

                                        //state = Transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
                                        //**************
                                        subLexerTagAndAttr.StateLoop3_Tag(SubLexerTagState.s08_TAG_OPEN_p, SubLexerTagState.s08_TAG_OPEN_p);
                                        state = subLexerTagAndAttr.OutputState;
                                        //**************

                                        break;
                                    // stateloop;
                                    case '\u0000':
                                        EmitReplacementCharacter();
                                        continue;
                                    case '\r':
                                        EmitCarriageReturn();
                                        return;
                                    case '\n':
                                    default:
                                        /*
                                         * Anything else Emit the input character as a
                                         * character token.
                                         * 
                                         * Stay in the data state.
                                         */
                                        continue;
                                }
                            }
                        } break;
                }
            }
        }