static public ITokenizer Create(GrammarSelector gs)
        {
            switch (gs)
            {
            case GrammarSelector.Micro:
            case GrammarSelector.Inference:
                return(new Tokenizer(gs));

            case GrammarSelector.InterpreterChapter1:
            case GrammarSelector.LISP:
            case GrammarSelector.APL:
            case GrammarSelector.Scheme:
            case GrammarSelector.SASL:
            case GrammarSelector.CLU:
            case GrammarSelector.Smalltalk:
            case GrammarSelector.Prolog:
            case GrammarSelector.JSON:
                return(new Inference.Interpreter.InterpreterTokenizer(gs));

            case GrammarSelector.Prolog2:
#if DEAD_CODE
                return(new Inference.Interpreter.InterpreterTokenizer(gs));
#else
                return(new Prolog2Tokenizer());
#endif

            default:
                break;
            }

            throw new ArgumentException("TokenizerFactory.Create() : Unrecognized GrammarSelector: " + gs.ToString(), "gs");
        }
        public FSMTokenizer(GrammarSelector gs)
        {
            this.gs = gs;

            dictInternalStringStateToDelimiter[TokenType.S_StrLitOpen]      = cStringDelimiter;
            dictInternalStringStateToCompletedState[TokenType.S_StrLitOpen] = TokenType.T_StrLit;

            acceptableTokens.Add(TokenType.T_IntLit);
            acceptableTokens.Add(TokenType.T_FltLit);
            acceptableTokens.Add(TokenType.T_StrLit);
            acceptableTokens.Add(TokenType.T_Ident);
            acceptableTokens.Add(TokenType.T_Mult);
            acceptableTokens.Add(TokenType.T_Div);
            acceptableTokens.Add(TokenType.T_Plus);
            acceptableTokens.Add(TokenType.T_Minus);
            acceptableTokens.Add(TokenType.T_Equal);
            acceptableTokens.Add(TokenType.T_NotEqual);
            acceptableTokens.Add(TokenType.T_Less);
            //acceptableTokens.Add(TokenType.T_LessEqual); // In Prolog, it's =<, not <=
            acceptableTokens.Add(TokenType.T_Greater);
            acceptableTokens.Add(TokenType.T_GreaterEqual);
            acceptableTokens.Add(TokenType.T_Semicolon);
            acceptableTokens.Add(TokenType.T_Comma);
            acceptableTokens.Add(TokenType.T_LeftBracket);
            acceptableTokens.Add(TokenType.T_RightBracket);
            acceptableTokens.Add(TokenType.T_Arrow);
            acceptableTokens.Add(TokenType.T_EOF);

            AddTransition(TokenType.S_Start, 'A', TokenType.T_Ident);
            AddTransition(TokenType.S_Start, '0', TokenType.T_IntLit);
            AddTransition(TokenType.S_Start, cStringDelimiter, TokenType.S_StrLitOpen);
            AddTransition(TokenType.S_Start, '*', TokenType.T_Mult);
            AddTransition(TokenType.S_Start, '/', TokenType.T_Div);
            AddTransition(TokenType.S_Start, '+', TokenType.T_Plus);
            AddTransition(TokenType.S_Start, '-', TokenType.T_Minus);
            AddTransition(TokenType.S_Start, '=', TokenType.T_Equal);
            AddTransition(TokenType.S_Start, '<', TokenType.T_Less);
            AddTransition(TokenType.S_Start, '>', TokenType.T_Greater);
            AddTransition(TokenType.S_Start, ':', TokenType.T_Colon);
            AddTransition(TokenType.S_Start, ';', TokenType.T_Semicolon);
            AddTransition(TokenType.S_Start, ',', TokenType.T_Comma);
            AddTransition(TokenType.S_Start, '|', TokenType.T_OrBar);
            AddTransition(TokenType.S_Start, '(', TokenType.T_LeftBracket);
            AddTransition(TokenType.S_Start, ')', TokenType.T_RightBracket);
            AddTransition(TokenType.T_Ident, 'A', TokenType.T_Ident);
            AddTransition(TokenType.T_Ident, '0', TokenType.T_Ident);
            AddTransition(TokenType.T_Ident, '_', TokenType.T_Ident);
            AddTransition(TokenType.T_IntLit, '0', TokenType.T_IntLit);
            AddTransition(TokenType.T_IntLit, '.', TokenType.S_IntLitDot);
            AddTransition(TokenType.S_IntLitDot, '0', TokenType.T_FltLit);
            AddTransition(TokenType.T_FltLit, '0', TokenType.T_FltLit);
            AddTransition(TokenType.S_StrLitOpen, cStringDelimiter, TokenType.T_StrLit);
            AddTransition(TokenType.T_StrLit, cStringDelimiter, TokenType.S_StrLitOpen);
            AddTransition(TokenType.T_Minus, '0', TokenType.T_IntLit);
            AddTransition(TokenType.T_Minus, '>', TokenType.T_Arrow);
            AddTransition(TokenType.T_Greater, '=', TokenType.T_GreaterEqual);
        }
        public Tokenizer(GrammarSelector gs)
            : base(gs)
        {
            acceptableTokens.Add(TokenType.T_Assign);               // := is a Micro token.

            AddTransition(TokenType.T_Colon, '=', TokenType.T_Assign);

            if (gs == GrammarSelector.Inference)
            {
                acceptableTokens.Add(TokenType.T_BoolIdent);
                acceptableTokens.Add(TokenType.T_SkolemIdent);
                acceptableTokens.Add(TokenType.T_2OrBar);
                acceptableTokens.Add(TokenType.T_2Ampersand);
                acceptableTokens.Add(TokenType.T_Exclamation);
                acceptableTokens.Add(TokenType.T_Variable);
                acceptableTokens.Add(TokenType.T_LessEqual);

                AddTransition(TokenType.S_Start, '&', TokenType.S_Ampersand);
                AddTransition(TokenType.S_Start, '?', TokenType.S_Question);
                AddTransition(TokenType.S_Start, '!', TokenType.T_Exclamation);
                AddTransition(TokenType.S_Start, '$', TokenType.S_Dollar);
                AddTransition(TokenType.S_Dollar, 'A', TokenType.T_SkolemIdent);
                AddTransition(TokenType.T_SkolemIdent, 'A', TokenType.T_SkolemIdent);
                AddTransition(TokenType.T_SkolemIdent, '0', TokenType.T_SkolemIdent);
                AddTransition(TokenType.T_SkolemIdent, '_', TokenType.T_SkolemIdent);
                AddTransition(TokenType.T_Less, '=', TokenType.T_LessEqual);
                AddTransition(TokenType.S_Question, 'A', TokenType.T_Variable);
                AddTransition(TokenType.T_Variable, 'A', TokenType.T_Variable);
                AddTransition(TokenType.T_Variable, '0', TokenType.T_Variable);
                AddTransition(TokenType.T_Variable, '_', TokenType.T_Variable);
                AddTransition(TokenType.T_OrBar, '|', TokenType.T_2OrBar);
                AddTransition(TokenType.S_Ampersand, '&', TokenType.T_2Ampersand);
                AddTransition(TokenType.S_Start, '@', TokenType.S_At);
                AddTransition(TokenType.S_At, 'A', TokenType.T_BoolIdent);
                AddTransition(TokenType.T_BoolIdent, 'A', TokenType.T_BoolIdent);
                AddTransition(TokenType.T_BoolIdent, '0', TokenType.T_BoolIdent);
                AddTransition(TokenType.T_BoolIdent, '_', TokenType.T_BoolIdent);
            }
        }
Ejemplo n.º 4
0
        public InterpreterTokenizer(GrammarSelector gs)
        {
            // This dictionary is used to recognize single-character tokens.
            dictCharToTokenType['('] = TokenType.T_LeftBracket;
            dictCharToTokenType[')'] = TokenType.T_RightBracket;

            if (gs == GrammarSelector.LISP ||
                gs == GrammarSelector.Scheme ||
                gs == GrammarSelector.SASL)
            {
                dictCharToTokenType['\'']          = TokenType.T_Apostrophe;
                dictQuoteDelimiterToTokenType['"'] = TokenType.T_StrLit;
                markQuotedTokens = true;
            }

            if (gs == GrammarSelector.APL)
            {
                dictCharToTokenType['\''] = TokenType.T_Apostrophe;
            }

            if (gs == GrammarSelector.CLU)
            {
                dictCharToTokenType['$'] = TokenType.T_Dollar;
            }

            if (gs == GrammarSelector.Smalltalk)
            {
                dictCharToTokenType['#'] = TokenType.T_Octothorpe;
                // ThAW 2014/02/03 : We want to recognize $; a sample Smalltalk character literal is $a (see page 319).
                dictCharToTokenType['$'] = TokenType.T_Dollar;
                // Use single quotes, not double quotes, as the string delimiter.  See the example string literal on page 319 of Kamin.
                dictQuoteDelimiterToTokenType['\''] = TokenType.T_StrLit;
            }

            if (gs == GrammarSelector.Prolog2)
            {
                dictCharToTokenType[',']            = TokenType.T_Comma;
                dictCharToTokenType['.']            = TokenType.T_Dot;
                dictCharToTokenType['[']            = TokenType.T_LeftSquareBracket;
                dictCharToTokenType[']']            = TokenType.T_RightSquareBracket;
                dictCharToTokenType['|']            = TokenType.T_OrBar;
                dictCharToTokenType[';']            = TokenType.T_Semicolon;
                dictCharToTokenType['{']            = TokenType.T_LeftCurlyBrace;
                dictCharToTokenType['}']            = TokenType.T_RightCurlyBrace;
                dictQuoteDelimiterToTokenType['\''] = TokenType.T_Ident; // For constructing identifiers that contain spaces or special characters.
                dictQuoteDelimiterToTokenType['"']  = TokenType.T_StrLit;
                commentDelimiter = '%';                                  // See http://users.cs.cf.ac.uk/O.F.Rana/prolog/lectureP2/node10.html
            }

            if (gs == GrammarSelector.JSON)
            {
                dictCharToTokenType.Clear();
                dictCharToTokenType[',']           = TokenType.T_Comma;
                dictCharToTokenType[':']           = TokenType.T_Colon;
                dictCharToTokenType['[']           = TokenType.T_LeftSquareBracket;
                dictCharToTokenType[']']           = TokenType.T_RightSquareBracket;
                dictCharToTokenType['{']           = TokenType.T_LeftCurlyBrace;
                dictCharToTokenType['}']           = TokenType.T_RightCurlyBrace;
                dictQuoteDelimiterToTokenType['"'] = TokenType.T_StrLit;
                // Is it possible to place a comment in a real JSON expression?  What is the delimiter?
            }

            //markQuotedTokens = gs == GrammarSelector.LISP || gs == GrammarSelector.Scheme;

            if (gs == GrammarSelector.APL)
            {
                commentDelimiter = '#'; // We cannot use ';', since [;] is an APL operator.
            }
        }