예제 #1
0
 void EmitOrAppendTwo(char[] val, NCRState returnState)
 {
     //TODO: review here=>   use != or == ?
     //if ((returnState & DATA_AND_RCDATA_MASK) != 0)
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
         AppendLongStrBuf(val[1]);
     }
     else
     {
         TokenListener.Characters(val, 0, 2);
     }
 }
예제 #2
0
 public void StateSelection(string state)
 {
     State.Click();
     if (state == "NCR")
     {
         NCRState.Click();
     }
     else if (state == "Uttar Pradesh")
     {
         UttarPradeshState.Click();
     }
     else if (state == "Haryana")
     {
         HaryanaState.Click();
     }
     else if (state == "Rajasthan")
     {
         RajasthanState.Click();
     }
     else
     {
         throw new Exception("State must be NCR/Uttar Pradesh/Haryana/Rajasthan");
     }
 }
예제 #3
0
 void EmitOrAppendOne(char[] val, NCRState returnState)
 {
     if (((byte)returnState & DATA_AND_RCDATA_MASK) == 0)
     {
         AppendLongStrBuf(val[0]);
     }
     else
     {
         TokenListener.Characters(val, 0, 1);
     }
 }
예제 #4
0
 void EmitOrAppendStrBuf(NCRState returnState)
 {
     throw new NotSupportedException();
 }
예제 #5
0
        void StateLoop3_NCR(NCRState state, NCRState returnState)
        {

            /*
             * Idioms used in this code:
             * 
             * 
             * Consuming the next input character
             * 
             * To consume the next input character, the code does this: if (++pos ==
             * endPos) { goto breakStateloop; } c = buf[pos];
             * 
             * 
             * Staying in a state
             * 
             * When there's a state that the tokenizer may stay in over multiple
             * input characters, the state has a wrapper |for(;;)| loop and staying
             * in the state continues the loop.
             * 
             * 
             * Switching to another state
             * 
             * To switch to another state, the code sets the state variable to the
             * magic number of the new state. Then it either continues stateloop or
             * breaks out of the state's own wrapper loop if the target state is
             * right after the current state in source order. (This is a partial
             * workaround for Java's lack of goto.)
             * 
             * 
             * Reconsume support
             * 
             * The spec sometimes says that an input character is reconsumed in
             * another state. If a state can ever be entered so that an input
             * character can be reconsumed in it, the state's code starts with an
             * |if (reconsume)| that sets reconsume to false and skips over the
             * normal code for consuming a new character.
             * 
             * To reconsume the current character in another state, the code sets
             * |reconsume| to true and then switches to the other state.
             * 
             * 
             * Emitting character tokens
             * 
             * This method emits character tokens lazily. Whenever a new range of
             * character tokens starts, the field cstart must be set to the start
             * index of the range. The flushChars() method must be called at the end
             * of a range to flush it.
             * 
             * 
             * U+0000 handling
             * 
             * The various states have to handle the replacement of U+0000 with
             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
             * replacement doesn't need to happen, because it's handled by the
             * reconsuming state.
             * 
             * 
             * LF handling
             * 
             * Every state needs to increment the line number upon LF unless the LF
             * gets reconsumed by another state which increments the line number.
             * 
             * 
             * CR handling
             * 
             * Every state needs to handle CR unless the CR gets reconsumed and is
             * handled by the reconsuming state. The CR needs to be handled as if it
             * were and LF, the lastCR field must be set to true and then this
             * method must return. The IO driver will then swallow the next
             * character if it is an LF to coalesce CRLF.
             */

            /*
             * As there is no support for labeled loops in C#, instead of break <loop>;
             * the port uses goto break<loop>; and a label after the loop.
             * Instead of continue <loop>; it uses goto continue<loop>; and a label
             * at the beginning or end of the loop (which doesn't matter in for(;;) loops)
             */


            /*stateloop:*/
            for (; ; )
            {

                //*************
            continueStateloop:
                //*************

                switch (state)
                {
                    // XXX reorder point
                    case (NCRState)InterLexerState.CONSUME_NCR_i:
                        {
                            char c;
                            if (!reader.ReadNext(out c))
                            {
                                //------------------------------------
                                //eof
                                goto breakStateloop;
                            }

                            prevValue = -1;
                            value = 0;
                            seenDigits = false;
                            /*
                             * The behavior further depends on the character after the
                             * U+0023 NUMBER SIGN:
                             */
                            switch (c)
                            {
                                case 'x':
                                case 'X':

                                    /*
                                     * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
                                     * LETTER X Consume the X.
                                     * 
                                     * Follow the steps below, but using the range of
                                     * characters U+0030 DIGIT ZERO through to U+0039
                                     * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
                                     * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
                                     * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
                                     * LETTER F (in other words, 0-9, A-F, a-f).
                                     * 
                                     * When it comes to interpreting the number,
                                     * interpret it as a hexadecimal number.
                                     */
                                    AppendStrBuf(c);
                                    //state = Transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
                                    state = NCRState.HEX_NCR_LOOP_p;
                                    goto continueStateloop;
                                default:
                                    /*
                                     * Anything else Follow the steps below, but using
                                     * the range of characters U+0030 DIGIT ZERO through
                                     * to U+0039 DIGIT NINE (i.e. just 0-9).
                                     * 
                                     * When it comes to interpreting the number,
                                     * interpret it as a decimal number.
                                     */
                                    //state = Transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
                                    state = NCRState.DECIMAL_NRC_LOOP_p;
                                    //reconsume = true;
                                    reader.StepBack();
                                    // FALL THROUGH goto continueStateloop;
                                    break;
                            }
                            //------------------------------------
                            // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                            goto case NCRState.DECIMAL_NRC_LOOP_p;
                        }
                    case NCRState.DECIMAL_NRC_LOOP_p:
                        /*decimalloop:*/
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                // Deal with overflow gracefully
                                if (value < prevValue)
                                {
                                    value = 0x110000; // Value above Unicode range but
                                    // within int
                                    // range
                                }
                                prevValue = value;
                                /*
                                 * Consume as many characters as match the range of
                                 * characters given above.
                                 */
                                if (c >= '0' && c <= '9')
                                {
                                    seenDigits = true;
                                    value *= 10;
                                    value += c - '0';
                                    continue;
                                }
                                else if (c == ';')
                                {
                                    if (seenDigits)
                                    {
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.SkipOneAndStartCollect();
                                        }
                                        //state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                        state = NCRState.HANDLE_NCR_VALUE_p;

                                        // FALL THROUGH goto continueStateloop;
                                        goto breakDecimalloop;
                                    }
                                    else
                                    {
                                        ErrNoDigitsInNCR();
                                        AppendStrBuf(';');
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.SkipOneAndStartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;

                                        goto continueStateloop;
                                    }
                                }
                                else
                                {
                                    /*
                                     * If no characters match the range, then don't
                                     * consume any characters (and unconsume the U+0023
                                     * NUMBER SIGN character and, if appropriate, the X
                                     * character). This is a parse error; nothing is
                                     * returned.
                                     * 
                                     * Otherwise, if the next character is a U+003B
                                     * SEMICOLON, consume that too. If it isn't, there
                                     * is a parse error.
                                     */
                                    if (!seenDigits)
                                    {
                                        ErrNoDigitsInNCR();
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    else
                                    {
                                        ErrCharRefLacksSemicolon();
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                        state = NCRState.HANDLE_NCR_VALUE_p;
                                        //reconsume = true;
                                        reader.StepBack();
                                        // FALL THROUGH goto continueStateloop;
                                        goto breakDecimalloop;
                                    }
                                }
                            }

                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        //-------------------------------------
                        breakDecimalloop:
                            goto case NCRState.HANDLE_NCR_VALUE_p;
                        }
                    // WARNING FALLTHRU case TokenizerState.TRANSITION: DON'T REORDER
                    case NCRState.HANDLE_NCR_VALUE_p:
                        {
                            // WARNING previous state sets reconsume
                            // XXX inline this case TokenizerState.if the method size can take it
                            HandleNcrValue(returnState);
                            //state = Transition(state, returnState, reconsume, pos);
                            state = returnState;
                            goto continueStateloop;
                        }
                    // XXX reorder point
                    case NCRState.HEX_NCR_LOOP_p:
                        {
                            char c;
                            while (reader.ReadNext(out c))
                            {

                                // Deal with overflow gracefully
                                if (value < prevValue)
                                {
                                    value = 0x110000; // Value above Unicode range but
                                    // within int
                                    // range
                                }
                                prevValue = value;
                                /*
                                 * Consume as many characters as match the range of
                                 * characters given above.
                                 */
                                if (c >= '0' && c <= '9')
                                {
                                    seenDigits = true;
                                    value *= 16;
                                    value += c - '0';
                                    continue;
                                }
                                else if (c >= 'A' && c <= 'F')
                                {
                                    seenDigits = true;
                                    value *= 16;
                                    value += c - 'A' + 10;
                                    continue;
                                }
                                else if (c >= 'a' && c <= 'f')
                                {
                                    seenDigits = true;
                                    value *= 16;
                                    value += c - 'a' + 10;
                                    continue;
                                }
                                else if (c == ';')
                                {
                                    if (seenDigits)
                                    {
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.SkipOneAndStartCollect();
                                        }
                                        //state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                        state = NCRState.HANDLE_NCR_VALUE_p;
                                        goto continueStateloop;
                                    }
                                    else
                                    {
                                        ErrNoDigitsInNCR();
                                        AppendStrBuf(';');
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.SkipOneAndStartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        goto continueStateloop;
                                    }
                                }
                                else
                                {
                                    /*
                                     * If no characters match the range, then don't
                                     * consume any characters (and unconsume the U+0023
                                     * NUMBER SIGN character and, if appropriate, the X
                                     * character). This is a parse error; nothing is
                                     * returned.
                                     * 
                                     * Otherwise, if the next character is a U+003B
                                     * SEMICOLON, consume that too. If it isn't, there
                                     * is a parse error.
                                     */
                                    if (!seenDigits)
                                    {
                                        ErrNoDigitsInNCR();
                                        EmitOrAppendStrBuf(returnState);
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, returnState, reconsume, pos);
                                        state = returnState;
                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                    else
                                    {
                                        ErrCharRefLacksSemicolon();
                                        //if ((returnState & DATA_AND_RCDATA_MASK) == 0)
                                        if (((byte)returnState & DATA_AND_RCDATA_MASK) != 0)
                                        {
                                            reader.StartCollect();
                                        }
                                        //state = Transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                        state = NCRState.HANDLE_NCR_VALUE_p;

                                        //reconsume = true;
                                        reader.StepBack();
                                        goto continueStateloop;
                                    }
                                }
                            }
                            //------------------------------------
                            //eof
                            goto breakStateloop;
                        }

                    // END HOTSPOT WORKAROUND
                }
            } // stateloop

        breakStateloop:
            //FlushChars(buf, pos);
            FlushChars();
            /*
             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
             */
            // Save locals
            //stateSave = state;
            //returnStateSave = returnState;
            SaveStates(state, returnState);
        }
예제 #6
0
        void SaveStates(NCRState state, NCRState returnState)
        {

        }
예제 #7
0
 void HandleNcrValue(NCRState returnState)
 {
     /*
      * If one or more characters match the range, then take them all and
      * interpret the string of characters as a number (either hexadecimal or
      * decimal as appropriate).
      */
     if (value <= 0xFFFF)
     {
         if (value >= 0x80 && value <= 0x9f)
         {
             /*
              * If that number is one of the numbers in the first column of
              * the following table, then this is a parse error.
              */
             ErrNcrInC1Range();
             /*
              * Find the row with that number in the first column, and return
              * a character token for the Unicode character given in the
              * second column of that row.
              */
             char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
             EmitOrAppendOne(val, returnState);
             // [NOCPP[
         }
         else if (value == 0xC
               && contentSpacePolicy != XmlViolationPolicy.Allow)
         {
             if (contentSpacePolicy == XmlViolationPolicy.AlterInfoset)
             {
                 EmitOrAppendOne(SPACE, returnState);
             }
             else if (contentSpacePolicy == XmlViolationPolicy.Fatal)
             {
                 Fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
             }
             // ]NOCPP]
         }
         else if (value == 0x0)
         {
             ErrNcrZero();
             EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
         }
         else if ((value & 0xF800) == 0xD800)
         {
             ErrNcrSurrogate();
             EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
         }
         else
         {
             /*
              * Otherwise, return a character token for the Unicode character
              * whose code point is that number.
              */
             char ch = (char)value;
             // [NOCPP[
             if (value == 0x0D)
             {
                 ErrNcrCr();
             }
             else if ((value <= 0x0008) || (value == 0x000B)
                   || (value >= 0x000E && value <= 0x001F))
             {
                 ch = ErrNcrControlChar(ch);
             }
             else if (value >= 0xFDD0 && value <= 0xFDEF)
             {
                 ErrNcrUnassigned();
             }
             else if ((value & 0xFFFE) == 0xFFFE)
             {
                 ch = ErrNcrNonCharacter(ch);
             }
             else if (value >= 0x007F && value <= 0x009F)
             {
                 ErrNcrControlChar();
             }
             else
             {
                 MaybeWarnPrivateUse(ch);
             }
             // ]NOCPP]
             bmpChar[0] = ch;
             EmitOrAppendOne(bmpChar, returnState);
         }
     }
     else if (value <= 0x10FFFF)
     {
         // [NOCPP[
         MaybeWarnPrivateUseAstral();
         if ((value & 0xFFFE) == 0xFFFE)
         {
             ErrAstralNonCharacter(value);
         }
         // ]NOCPP]
         astralChar[0] = (char)(LEAD_OFFSET + (value >> 10));
         astralChar[1] = (char)(0xDC00 + (value & 0x3FF));
         EmitOrAppendTwo(astralChar, returnState);
     }
     else
     {
         ErrNcrOutOfRange();
         EmitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
     }
 }