private void InitializeStaticLexer(IdentifierType idType = IdentifierType.Alpha, params GenericToken[] staticTokens) { FSMBuilder = new FSMLexerBuilder <GenericToken>(); StringCounter = 0; // conf FSMBuilder.IgnoreWS() .WhiteSpace(' ') .WhiteSpace('\t') .IgnoreEOL(); // start machine definition FSMBuilder.Mark(start); if (staticTokens.ToList().Contains(GenericToken.Identifier) || staticTokens.ToList().Contains(GenericToken.KeyWord)) { InitializeIdentifier(idType); } //numeric if (staticTokens.ToList().Contains(GenericToken.Int) || staticTokens.ToList().Contains(GenericToken.Double)) { FSMBuilder = FSMBuilder.GoTo(start) .RangeTransition('0', '9') .Mark(in_int) .RangeTransitionTo('0', '9', in_int) .End(GenericToken.Int); if (staticTokens.ToList().Contains(GenericToken.Double)) { FSMBuilder.Transition('.') .Mark(start_double) .RangeTransition('0', '9') .Mark(in_double) .RangeTransitionTo('0', '9', in_double) .End(GenericToken.Double); } } LexerFsm = FSMBuilder.Fsm; }
private void InitializeStaticLexer(Config config, GenericToken[] staticTokens) { FSMBuilder = new FSMLexerBuilder <GenericToken>(); StringCounter = 0; // conf FSMBuilder .IgnoreWS(config.IgnoreWS) .WhiteSpace(config.WhiteSpace) .IgnoreEOL(config.IgnoreEOL); // start machine definition FSMBuilder.Mark(start); if (staticTokens.Contains(GenericToken.Identifier) || staticTokens.Contains(GenericToken.KeyWord)) { InitializeIdentifier(config); } // numeric if (staticTokens.Contains(GenericToken.Int) || staticTokens.Contains(GenericToken.Double)) { FSMBuilder = FSMBuilder.GoTo(start) .RangeTransition('0', '9') .Mark(in_int) .RangeTransitionTo('0', '9', in_int) .End(GenericToken.Int); if (staticTokens.Contains(GenericToken.Double)) { FSMBuilder.Transition('.') .Mark(start_double) .RangeTransition('0', '9') .Mark(in_double) .RangeTransitionTo('0', '9', in_double) .End(GenericToken.Double); } } LexerFsm = FSMBuilder.Fsm; }
public void AddCharLexem(IN token, BuildResult <ILexer <IN> > result, string charDelimiter, string escapeDelimiterChar = "\\") { if (string.IsNullOrEmpty(charDelimiter) || charDelimiter.Length > 1) { result.AddError(new InitializationError(ErrorLevel.FATAL, $"bad lexem {charDelimiter} : CharToken lexeme delimiter char <{token.ToString()}> must be 1 character length.", ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_MUST_BE_1_CHAR)); } if (charDelimiter.Length == 1 && char.IsLetterOrDigit(charDelimiter[0])) { result.AddError(new InitializationError(ErrorLevel.FATAL, $"bad lexem {charDelimiter} : CharToken lexeme delimiter char <{token.ToString()}> can not start with a letter or digit.", ErrorCodes.LEXER_CHAR_DELIMITER_CANNOT_BE_LETTER)); } if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1) { result.AddError(new InitializationError(ErrorLevel.FATAL, $"bad lexem {escapeDelimiterChar} : CharToken lexeme escape char <{token.ToString()}> must be 1 character length.", ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_MUST_BE_1_CHAR)); } if (escapeDelimiterChar.Length == 1 && char.IsLetterOrDigit(escapeDelimiterChar[0])) { result.AddError(new InitializationError(ErrorLevel.FATAL, $"bad lexem {escapeDelimiterChar} : CharToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter or digit.", ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_CANNOT_BE_LETTER_OR_DIGIT)); } CharCounter++; var charDelimiterChar = charDelimiter[0]; var escapeChar = escapeDelimiterChar[0]; NodeCallback <GenericToken> callback = match => { match.Properties[DerivedToken] = token; var value = match.Result.SpanValue; match.Result.SpanValue = value; return(match); }; FSMBuilder.GoTo(start); FSMBuilder.Transition(charDelimiterChar) .Mark(start_char + "_" + CharCounter) .ExceptTransition(new[] { charDelimiterChar, escapeChar }) .Mark(in_char + "_" + CharCounter) .Transition(charDelimiterChar) .Mark(end_char + "_" + CharCounter) .End(GenericToken.Char) .CallBack(callback) .GoTo(start_char + "_" + CharCounter) .Transition(escapeChar) .Mark(escapeChar_char + "_" + CharCounter) .ExceptTransitionTo(new[] { 'u' }, in_char + "_" + CharCounter) .CallBack(callback); FSMBuilder.Fsm.StringDelimiter = charDelimiterChar; // TODO : unicode transitions FSMBuilder = FSMBuilder.GoTo(escapeChar_char + "_" + CharCounter) .Transition('u') .Mark(unicode_char + "_" + CharCounter) .RepetitionTransitionTo(in_char + "_" + CharCounter, 4, "[0-9,a-z,A-Z]"); }
private static void testLexerBuilder() { var builder = new FSMLexerBuilder <JsonToken>(); // conf builder.IgnoreWS() .WhiteSpace(' ') .WhiteSpace('\t') .IgnoreEOL(); // start machine definition builder.Mark("start"); // string literal builder.Transition('\"') .Mark("in_string") .ExceptTransitionTo(new[] { '\"', '\\' }, "in_string") .Transition('\\') .Mark("escape") .AnyTransitionTo(' ', "in_string") .Transition('\"') .End(JsonToken.STRING) .Mark("string_end") .CallBack(match => { match.Result.Value = match.Result.Value.ToUpper(); return(match); }); // accolades builder.GoTo("start") .Transition('{') .End(JsonToken.ACCG); builder.GoTo("start") .Transition('}') .End(JsonToken.ACCD); // corchets builder.GoTo("start") .Transition('[') .End(JsonToken.CROG); builder.GoTo("start") .Transition(']') .End(JsonToken.CROD); // 2 points builder.GoTo("start") .Transition(':') .End(JsonToken.COLON); // comma builder.GoTo("start") .Transition(',') .End(JsonToken.COMMA); //numeric builder.GoTo("start") .RangeTransition('0', '9') .Mark("in_int") .RangeTransitionTo('0', '9', "in_int") .End(JsonToken.INT) .Transition('.') .Mark("start_double") .RangeTransition('0', '9') .Mark("in_double") .RangeTransitionTo('0', '9', "in_double") .End(JsonToken.DOUBLE); var code = "{\n\"d\" : 42.42 ,\n\"i\" : 42 ,\n\"s\" : \"quarante-deux\",\n\"s2\":\"a\\\"b\"\n}"; //code = File.ReadAllText("test.json"); var lex = builder.Fsm; var r = lex.Run(code, 0); var total = ""; while (r.IsSuccess) { var msg = $"{r.Result.TokenID} : {r.Result.Value} @{r.Result.Position}"; total += msg + "\n"; Console.WriteLine(msg); r = lex.Run(code); } }
public void AddCharLexem(IN token, string charDelimiter, string escapeDelimiterChar = "\\") { if (string.IsNullOrEmpty(charDelimiter) || charDelimiter.Length > 1) { throw new InvalidLexerException( $"bad lexem {charDelimiter} : CharToken lexeme delimiter char <{token.ToString()}> must be 1 character length."); } if (char.IsLetterOrDigit(charDelimiter[0])) { throw new InvalidLexerException( $"bad lexem {charDelimiter} : CharToken lexeme delimiter char <{token.ToString()}> can not start with a letter."); } if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1) { throw new InvalidLexerException( $"bad lexem {escapeDelimiterChar} : CharToken lexeme escape char <{token.ToString()}> must be 1 character length."); } if (char.IsLetterOrDigit(escapeDelimiterChar[0])) { throw new InvalidLexerException( $"bad lexem {escapeDelimiterChar} : CharToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter."); } CharCounter++; var charDelimiterChar = charDelimiter[0]; var escapeChar = escapeDelimiterChar[0]; NodeCallback <GenericToken> callback = match => { match.Properties[DerivedToken] = token; var value = match.Result.SpanValue; match.Result.SpanValue = value; return(match); }; FSMBuilder.GoTo(start); FSMBuilder.Transition(charDelimiterChar) .Mark(start_char + "_" + CharCounter) .ExceptTransition(new[] { charDelimiterChar, escapeChar }) .Mark(in_char + "_" + CharCounter) .Transition(charDelimiterChar) .Mark(end_char + "_" + CharCounter) .End(GenericToken.Char) .CallBack(callback) .GoTo(start_char + "_" + CharCounter) .Transition(escapeChar) .Mark(escapeChar_char + "_" + CharCounter) .ExceptTransitionTo(new[] { 'u' }, in_char + "_" + CharCounter) .CallBack(callback); FSMBuilder.Fsm.StringDelimiter = charDelimiterChar; // TODO : unicode transitions FSMBuilder = FSMBuilder.GoTo(escapeChar_char + "_" + CharCounter) .Transition('u') .Mark(unicode_char + "_" + CharCounter) .RepetitionTransitionTo(in_char + "_" + CharCounter, 4, "[0-9,a-z,A-Z]"); }