예제 #1
0
 /// <summary>
 /// When an error was encountered, runs the lexer's DFA to match a terminal in the input ahead
 /// </summary>
 /// <param name="originIndex">The current start index in the input text</param>
 /// <returns>The matching DFA state and length</returns>
 internal TokenMatch RunDFAOnError(int originIndex)
 {
     if (RecoveryDistance <= 0)
     {
         OnError(new UnexpectedCharError(text.GetValue(originIndex).ToString(), text.GetPositionAt(originIndex)));
         return(new TokenMatch(1));
     }
     else
     {
         int index = -1;
         // index of the separator terminal, if any
         for (int i = 0; i != symTerminals.Count; i++)
         {
             if (symTerminals[i].ID == separatorID)
             {
                 index = i;
                 break;
             }
         }
         FuzzyMatcher handler = new FuzzyMatcher(automaton, index, text, OnError, RecoveryDistance, originIndex);
         return(handler.Run());
     }
 }
예제 #2
0
        /// <summary>
        /// Reports on the lexical error at the specified index
        /// </summary>
        /// <param name="index">The index in the input where the error occurs</param>
        private void OnError(int index)
        {
            ParseErrorType errorType = ParseErrorType.UnexpectedChar;
            bool           atEnd     = text.IsEnd(index);
            string         value     = "";

            if (atEnd)
            {
                // the end of input was not expected
                // there is necessarily some input before because an empty input would have matched the $
                char c = text.GetValue(index - 1);
                if (c >= 0xD800 && c <= 0xDBFF)
                {
                    // a trailing UTF-16 high surrogate
                    index--;
                    errorType = ParseErrorType.IncorrectUTF16NoLowSurrogate;
                }
                else
                {
                    errorType = ParseErrorType.UnexpectedEndOfInput;
                }
            }
            else
            {
                char c = text.GetValue(index);
                if (c >= 0xD800 && c <= 0xDBFF && !text.IsEnd(index + 1))
                {
                    // a UTF-16 high surrogate
                    // if next next character is a low surrogate, also get it
                    char c2 = text.GetValue(index + 1);
                    if (c2 >= 0xDC00 && c2 <= 0xDFFF)
                    {
                        value = new string(new [] { c, c2 });
                    }
                    else
                    {
                        errorType = ParseErrorType.IncorrectUTF16NoLowSurrogate;
                    }
                }
                else if (c >= 0xDC00 && c <= 0xDFFF && index > 0)
                {
                    // a UTF-16 low surrogate
                    // if the previous character is a high surrogate, also get it
                    char c2 = text.GetValue(index - 1);
                    if (c2 >= 0xD800 && c2 <= 0xDBFF)
                    {
                        index--;
                        value = new string(new [] { c2, c });
                    }
                    else
                    {
                        errorType = ParseErrorType.IncorrectUTF16NoHighSurrogate;
                    }
                }
                if (value.Length == 0)
                {
                    value = c.ToString();
                }
            }
            switch (errorType)
            {
            case ParseErrorType.UnexpectedEndOfInput:
                errors(new UnexpectedEndOfInput(text.GetPositionAt(index)));
                break;

            case ParseErrorType.UnexpectedChar:
                errors(new UnexpectedCharError(value, text.GetPositionAt(index)));
                break;

            case ParseErrorType.IncorrectUTF16NoHighSurrogate:
            case ParseErrorType.IncorrectUTF16NoLowSurrogate:
                errors(new IncorrectEncodingSequence(text.GetPositionAt(index), text.GetValue(index), errorType));
                break;

            default:
                break;
            }
        }