private void InitializeStaticLexer(IdentifierType idType = IdentifierType.Alpha, params GenericToken[] staticTokens) { FSMBuilder = new FSMLexerBuilder <GenericToken>(); StringCounter = 0; // conf FSMBuilder.IgnoreWS() .WhiteSpace(' ') .WhiteSpace('\t') .IgnoreEOL(); // start machine definition FSMBuilder.Mark(start); if (staticTokens.ToList().Contains(GenericToken.Identifier) || staticTokens.ToList().Contains(GenericToken.KeyWord)) { InitializeIdentifier(idType); } //numeric if (staticTokens.ToList().Contains(GenericToken.Int) || staticTokens.ToList().Contains(GenericToken.Double)) { FSMBuilder = FSMBuilder.GoTo(start) .RangeTransition('0', '9') .Mark(in_int) .RangeTransitionTo('0', '9', in_int) .End(GenericToken.Int); if (staticTokens.ToList().Contains(GenericToken.Double)) { FSMBuilder.Transition('.') .Mark(start_double) .RangeTransition('0', '9') .Mark(in_double) .RangeTransitionTo('0', '9', in_double) .End(GenericToken.Double); } } LexerFsm = FSMBuilder.Fsm; }
private void InitializeStaticLexer(Config config, GenericToken[] staticTokens) { FSMBuilder = new FSMLexerBuilder <GenericToken>(); StringCounter = 0; // conf FSMBuilder .IgnoreWS(config.IgnoreWS) .WhiteSpace(config.WhiteSpace) .IgnoreEOL(config.IgnoreEOL); // start machine definition FSMBuilder.Mark(start); if (staticTokens.Contains(GenericToken.Identifier) || staticTokens.Contains(GenericToken.KeyWord)) { InitializeIdentifier(config); } // numeric if (staticTokens.Contains(GenericToken.Int) || staticTokens.Contains(GenericToken.Double)) { FSMBuilder = FSMBuilder.GoTo(start) .RangeTransition('0', '9') .Mark(in_int) .RangeTransitionTo('0', '9', in_int) .End(GenericToken.Int); if (staticTokens.Contains(GenericToken.Double)) { FSMBuilder.Transition('.') .Mark(start_double) .RangeTransition('0', '9') .Mark(in_double) .RangeTransitionTo('0', '9', in_double) .End(GenericToken.Double); } } LexerFsm = FSMBuilder.Fsm; }
private void InitializeIdentifier(Config config) { // identifier if (config.IdType == IdentifierType.Custom) { var marked = false; foreach (var pattern in config.IdentifierStartPattern) { FSMBuilder.GoTo(start); if (pattern.Length == 1) { if (marked) { FSMBuilder.TransitionTo(pattern[0], in_identifier); } else { FSMBuilder.Transition(pattern[0]).Mark(in_identifier).End(GenericToken.Identifier); marked = true; } } else { if (marked) { FSMBuilder.RangeTransitionTo(pattern[0], pattern[1], in_identifier); } else { FSMBuilder.RangeTransition(pattern[0], pattern[1]).Mark(in_identifier).End(GenericToken.Identifier); marked = true; } } } foreach (var pattern in config.IdentifierRestPattern) { if (pattern.Length == 1) { FSMBuilder.TransitionTo(pattern[0], in_identifier); } else { FSMBuilder.RangeTransitionTo(pattern[0], pattern[1], in_identifier); } } } else { FSMBuilder .GoTo(start) .RangeTransition('a', 'z') .Mark(in_identifier) .GoTo(start) .RangeTransitionTo('A', 'Z', in_identifier) .RangeTransitionTo('a', 'z', in_identifier) .RangeTransitionTo('A', 'Z', in_identifier) .End(GenericToken.Identifier); if (config.IdType == IdentifierType.AlphaNumeric || config.IdType == IdentifierType.AlphaNumericDash) { FSMBuilder .GoTo(in_identifier) .RangeTransitionTo('0', '9', in_identifier); } if (config.IdType == IdentifierType.AlphaNumericDash) { FSMBuilder .GoTo(start) .TransitionTo('_', in_identifier) .TransitionTo('_', in_identifier) .TransitionTo('-', in_identifier); } } }
public void AddStringLexem(IN token, string stringDelimiter, string escapeDelimiterChar = "\\") { if (string.IsNullOrEmpty(stringDelimiter) || stringDelimiter.Length > 1) { throw new InvalidLexerException($"bad lexem {stringDelimiter} : StringToken lexeme delimiter char <{token.ToString()}> must be 1 character length."); } if (char.IsLetterOrDigit(stringDelimiter[0])) { throw new InvalidLexerException($"bad lexem {stringDelimiter} : StringToken lexeme delimiter char <{token.ToString()}> can not start with a letter."); } if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1) { throw new InvalidLexerException($"bad lexem {escapeDelimiterChar} : StringToken lexeme escape char <{token.ToString()}> must be 1 character length."); } if (char.IsLetterOrDigit(escapeDelimiterChar[0])) { throw new InvalidLexerException($"bad lexem {escapeDelimiterChar} : StringToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter."); } StringCounter++; StringDelimiterChar = stringDelimiter[0]; EscapeStringDelimiterChar = escapeDelimiterChar[0]; NodeCallback <GenericToken> callback = (FSMMatch <GenericToken> match) => { match.Properties[DerivedToken] = token; string value = match.Result.Value; match.Result.Value = value; return(match); }; if (StringDelimiterChar != EscapeStringDelimiterChar) { FSMBuilder.GoTo(start); FSMBuilder.Transition(StringDelimiterChar) .Mark(in_string + StringCounter) .ExceptTransitionTo(new char[] { StringDelimiterChar, EscapeStringDelimiterChar }, in_string + StringCounter) .Transition(EscapeStringDelimiterChar) .Mark(escape_string + StringCounter) .AnyTransitionTo(' ', in_string + StringCounter) .Transition(StringDelimiterChar) .End(GenericToken.String) .Mark(string_end + StringCounter) .CallBack(callback); FSMBuilder.Fsm.StringDelimiter = StringDelimiterChar; } else { NodeAction collapseDelimiter = (string value) => { if (value.EndsWith("" + StringDelimiterChar + StringDelimiterChar)) { return(value.Substring(0, value.Length - 2) + StringDelimiterChar); } return(value); }; var exceptDelimiter = new char[] { StringDelimiterChar }; string in_string = "in_string_same"; string escaped = "escaped_same"; string delim = "delim_same"; FSMBuilder.GoTo(start) .Transition(StringDelimiterChar) .Mark(in_string + StringCounter) .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter) .Transition(StringDelimiterChar) .Mark(escaped + StringCounter) .End(GenericToken.String) .CallBack(callback) .Transition(StringDelimiterChar) .Mark(delim + StringCounter) .Action(collapseDelimiter) .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter); FSMBuilder.GoTo(delim + StringCounter) .TransitionTo(StringDelimiterChar, escaped + StringCounter) .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter); } }
private static void testLexerBuilder() { var builder = new FSMLexerBuilder <JsonToken>(); // conf builder.IgnoreWS() .WhiteSpace(' ') .WhiteSpace('\t') .IgnoreEOL(); // start machine definition builder.Mark("start"); // string literal builder.Transition('\"') .Mark("in_string") .ExceptTransitionTo(new[] { '\"', '\\' }, "in_string") .Transition('\\') .Mark("escape") .AnyTransitionTo(' ', "in_string") .Transition('\"') .End(JsonToken.STRING) .Mark("string_end") .CallBack(match => { match.Result.Value = match.Result.Value.ToUpper(); return(match); }); // accolades builder.GoTo("start") .Transition('{') .End(JsonToken.ACCG); builder.GoTo("start") .Transition('}') .End(JsonToken.ACCD); // corchets builder.GoTo("start") .Transition('[') .End(JsonToken.CROG); builder.GoTo("start") .Transition(']') .End(JsonToken.CROD); // 2 points builder.GoTo("start") .Transition(':') .End(JsonToken.COLON); // comma builder.GoTo("start") .Transition(',') .End(JsonToken.COMMA); //numeric builder.GoTo("start") .RangeTransition('0', '9') .Mark("in_int") .RangeTransitionTo('0', '9', "in_int") .End(JsonToken.INT) .Transition('.') .Mark("start_double") .RangeTransition('0', '9') .Mark("in_double") .RangeTransitionTo('0', '9', "in_double") .End(JsonToken.DOUBLE); var code = "{\n\"d\" : 42.42 ,\n\"i\" : 42 ,\n\"s\" : \"quarante-deux\",\n\"s2\":\"a\\\"b\"\n}"; //code = File.ReadAllText("test.json"); var lex = builder.Fsm; var r = lex.Run(code, 0); var total = ""; while (r.IsSuccess) { var msg = $"{r.Result.TokenID} : {r.Result.Value} @{r.Result.Position}"; total += msg + "\n"; Console.WriteLine(msg); r = lex.Run(code); } }