static public ITokenizer Create(GrammarSelector gs) { switch (gs) { case GrammarSelector.Micro: case GrammarSelector.Inference: return(new Tokenizer(gs)); case GrammarSelector.InterpreterChapter1: case GrammarSelector.LISP: case GrammarSelector.APL: case GrammarSelector.Scheme: case GrammarSelector.SASL: case GrammarSelector.CLU: case GrammarSelector.Smalltalk: case GrammarSelector.Prolog: case GrammarSelector.JSON: return(new Inference.Interpreter.InterpreterTokenizer(gs)); case GrammarSelector.Prolog2: #if DEAD_CODE return(new Inference.Interpreter.InterpreterTokenizer(gs)); #else return(new Prolog2Tokenizer()); #endif default: break; } throw new ArgumentException("TokenizerFactory.Create() : Unrecognized GrammarSelector: " + gs.ToString(), "gs"); }
public FSMTokenizer(GrammarSelector gs) { this.gs = gs; dictInternalStringStateToDelimiter[TokenType.S_StrLitOpen] = cStringDelimiter; dictInternalStringStateToCompletedState[TokenType.S_StrLitOpen] = TokenType.T_StrLit; acceptableTokens.Add(TokenType.T_IntLit); acceptableTokens.Add(TokenType.T_FltLit); acceptableTokens.Add(TokenType.T_StrLit); acceptableTokens.Add(TokenType.T_Ident); acceptableTokens.Add(TokenType.T_Mult); acceptableTokens.Add(TokenType.T_Div); acceptableTokens.Add(TokenType.T_Plus); acceptableTokens.Add(TokenType.T_Minus); acceptableTokens.Add(TokenType.T_Equal); acceptableTokens.Add(TokenType.T_NotEqual); acceptableTokens.Add(TokenType.T_Less); //acceptableTokens.Add(TokenType.T_LessEqual); // In Prolog, it's =<, not <= acceptableTokens.Add(TokenType.T_Greater); acceptableTokens.Add(TokenType.T_GreaterEqual); acceptableTokens.Add(TokenType.T_Semicolon); acceptableTokens.Add(TokenType.T_Comma); acceptableTokens.Add(TokenType.T_LeftBracket); acceptableTokens.Add(TokenType.T_RightBracket); acceptableTokens.Add(TokenType.T_Arrow); acceptableTokens.Add(TokenType.T_EOF); AddTransition(TokenType.S_Start, 'A', TokenType.T_Ident); AddTransition(TokenType.S_Start, '0', TokenType.T_IntLit); AddTransition(TokenType.S_Start, cStringDelimiter, TokenType.S_StrLitOpen); AddTransition(TokenType.S_Start, '*', TokenType.T_Mult); AddTransition(TokenType.S_Start, '/', TokenType.T_Div); AddTransition(TokenType.S_Start, '+', TokenType.T_Plus); AddTransition(TokenType.S_Start, '-', TokenType.T_Minus); AddTransition(TokenType.S_Start, '=', TokenType.T_Equal); AddTransition(TokenType.S_Start, '<', TokenType.T_Less); AddTransition(TokenType.S_Start, '>', TokenType.T_Greater); AddTransition(TokenType.S_Start, ':', TokenType.T_Colon); AddTransition(TokenType.S_Start, ';', TokenType.T_Semicolon); AddTransition(TokenType.S_Start, ',', TokenType.T_Comma); AddTransition(TokenType.S_Start, '|', TokenType.T_OrBar); AddTransition(TokenType.S_Start, '(', TokenType.T_LeftBracket); AddTransition(TokenType.S_Start, ')', TokenType.T_RightBracket); AddTransition(TokenType.T_Ident, 'A', TokenType.T_Ident); AddTransition(TokenType.T_Ident, '0', TokenType.T_Ident); AddTransition(TokenType.T_Ident, '_', TokenType.T_Ident); AddTransition(TokenType.T_IntLit, '0', TokenType.T_IntLit); AddTransition(TokenType.T_IntLit, '.', TokenType.S_IntLitDot); AddTransition(TokenType.S_IntLitDot, '0', TokenType.T_FltLit); AddTransition(TokenType.T_FltLit, '0', TokenType.T_FltLit); AddTransition(TokenType.S_StrLitOpen, cStringDelimiter, TokenType.T_StrLit); AddTransition(TokenType.T_StrLit, cStringDelimiter, TokenType.S_StrLitOpen); AddTransition(TokenType.T_Minus, '0', TokenType.T_IntLit); AddTransition(TokenType.T_Minus, '>', TokenType.T_Arrow); AddTransition(TokenType.T_Greater, '=', TokenType.T_GreaterEqual); }
public Tokenizer(GrammarSelector gs) : base(gs) { acceptableTokens.Add(TokenType.T_Assign); // := is a Micro token. AddTransition(TokenType.T_Colon, '=', TokenType.T_Assign); if (gs == GrammarSelector.Inference) { acceptableTokens.Add(TokenType.T_BoolIdent); acceptableTokens.Add(TokenType.T_SkolemIdent); acceptableTokens.Add(TokenType.T_2OrBar); acceptableTokens.Add(TokenType.T_2Ampersand); acceptableTokens.Add(TokenType.T_Exclamation); acceptableTokens.Add(TokenType.T_Variable); acceptableTokens.Add(TokenType.T_LessEqual); AddTransition(TokenType.S_Start, '&', TokenType.S_Ampersand); AddTransition(TokenType.S_Start, '?', TokenType.S_Question); AddTransition(TokenType.S_Start, '!', TokenType.T_Exclamation); AddTransition(TokenType.S_Start, '$', TokenType.S_Dollar); AddTransition(TokenType.S_Dollar, 'A', TokenType.T_SkolemIdent); AddTransition(TokenType.T_SkolemIdent, 'A', TokenType.T_SkolemIdent); AddTransition(TokenType.T_SkolemIdent, '0', TokenType.T_SkolemIdent); AddTransition(TokenType.T_SkolemIdent, '_', TokenType.T_SkolemIdent); AddTransition(TokenType.T_Less, '=', TokenType.T_LessEqual); AddTransition(TokenType.S_Question, 'A', TokenType.T_Variable); AddTransition(TokenType.T_Variable, 'A', TokenType.T_Variable); AddTransition(TokenType.T_Variable, '0', TokenType.T_Variable); AddTransition(TokenType.T_Variable, '_', TokenType.T_Variable); AddTransition(TokenType.T_OrBar, '|', TokenType.T_2OrBar); AddTransition(TokenType.S_Ampersand, '&', TokenType.T_2Ampersand); AddTransition(TokenType.S_Start, '@', TokenType.S_At); AddTransition(TokenType.S_At, 'A', TokenType.T_BoolIdent); AddTransition(TokenType.T_BoolIdent, 'A', TokenType.T_BoolIdent); AddTransition(TokenType.T_BoolIdent, '0', TokenType.T_BoolIdent); AddTransition(TokenType.T_BoolIdent, '_', TokenType.T_BoolIdent); } }
public InterpreterTokenizer(GrammarSelector gs) { // This dictionary is used to recognize single-character tokens. dictCharToTokenType['('] = TokenType.T_LeftBracket; dictCharToTokenType[')'] = TokenType.T_RightBracket; if (gs == GrammarSelector.LISP || gs == GrammarSelector.Scheme || gs == GrammarSelector.SASL) { dictCharToTokenType['\''] = TokenType.T_Apostrophe; dictQuoteDelimiterToTokenType['"'] = TokenType.T_StrLit; markQuotedTokens = true; } if (gs == GrammarSelector.APL) { dictCharToTokenType['\''] = TokenType.T_Apostrophe; } if (gs == GrammarSelector.CLU) { dictCharToTokenType['$'] = TokenType.T_Dollar; } if (gs == GrammarSelector.Smalltalk) { dictCharToTokenType['#'] = TokenType.T_Octothorpe; // ThAW 2014/02/03 : We want to recognize $; a sample Smalltalk character literal is $a (see page 319). dictCharToTokenType['$'] = TokenType.T_Dollar; // Use single quotes, not double quotes, as the string delimiter. See the example string literal on page 319 of Kamin. dictQuoteDelimiterToTokenType['\''] = TokenType.T_StrLit; } if (gs == GrammarSelector.Prolog2) { dictCharToTokenType[','] = TokenType.T_Comma; dictCharToTokenType['.'] = TokenType.T_Dot; dictCharToTokenType['['] = TokenType.T_LeftSquareBracket; dictCharToTokenType[']'] = TokenType.T_RightSquareBracket; dictCharToTokenType['|'] = TokenType.T_OrBar; dictCharToTokenType[';'] = TokenType.T_Semicolon; dictCharToTokenType['{'] = TokenType.T_LeftCurlyBrace; dictCharToTokenType['}'] = TokenType.T_RightCurlyBrace; dictQuoteDelimiterToTokenType['\''] = TokenType.T_Ident; // For constructing identifiers that contain spaces or special characters. dictQuoteDelimiterToTokenType['"'] = TokenType.T_StrLit; commentDelimiter = '%'; // See http://users.cs.cf.ac.uk/O.F.Rana/prolog/lectureP2/node10.html } if (gs == GrammarSelector.JSON) { dictCharToTokenType.Clear(); dictCharToTokenType[','] = TokenType.T_Comma; dictCharToTokenType[':'] = TokenType.T_Colon; dictCharToTokenType['['] = TokenType.T_LeftSquareBracket; dictCharToTokenType[']'] = TokenType.T_RightSquareBracket; dictCharToTokenType['{'] = TokenType.T_LeftCurlyBrace; dictCharToTokenType['}'] = TokenType.T_RightCurlyBrace; dictQuoteDelimiterToTokenType['"'] = TokenType.T_StrLit; // Is it possible to place a comment in a real JSON expression? What is the delimiter? } //markQuotedTokens = gs == GrammarSelector.LISP || gs == GrammarSelector.Scheme; if (gs == GrammarSelector.APL) { commentDelimiter = '#'; // We cannot use ';', since [;] is an APL operator. } }