Пример #1
0
        private void InitializeStaticLexer(IdentifierType idType = IdentifierType.Alpha,
                                           params GenericToken[] staticTokens)
        {
            FSMBuilder    = new FSMLexerBuilder <GenericToken>();
            StringCounter = 0;

            // conf
            FSMBuilder.IgnoreWS()
            .WhiteSpace(' ')
            .WhiteSpace('\t')
            .IgnoreEOL();

            // start machine definition
            FSMBuilder.Mark(start);

            if (staticTokens.ToList().Contains(GenericToken.Identifier) ||
                staticTokens.ToList().Contains(GenericToken.KeyWord))
            {
                InitializeIdentifier(idType);
            }

            //numeric
            if (staticTokens.ToList().Contains(GenericToken.Int) || staticTokens.ToList().Contains(GenericToken.Double))
            {
                FSMBuilder = FSMBuilder.GoTo(start)
                             .RangeTransition('0', '9')
                             .Mark(in_int)
                             .RangeTransitionTo('0', '9', in_int)
                             .End(GenericToken.Int);
                if (staticTokens.ToList().Contains(GenericToken.Double))
                {
                    FSMBuilder.Transition('.')
                    .Mark(start_double)
                    .RangeTransition('0', '9')
                    .Mark(in_double)
                    .RangeTransitionTo('0', '9', in_double)
                    .End(GenericToken.Double);
                }
            }

            LexerFsm = FSMBuilder.Fsm;
        }
Пример #2
0
        private void InitializeStaticLexer(Config config, GenericToken[] staticTokens)
        {
            FSMBuilder    = new FSMLexerBuilder <GenericToken>();
            StringCounter = 0;

            // conf
            FSMBuilder
            .IgnoreWS(config.IgnoreWS)
            .WhiteSpace(config.WhiteSpace)
            .IgnoreEOL(config.IgnoreEOL);

            // start machine definition
            FSMBuilder.Mark(start);

            if (staticTokens.Contains(GenericToken.Identifier) || staticTokens.Contains(GenericToken.KeyWord))
            {
                InitializeIdentifier(config);
            }

            // numeric
            if (staticTokens.Contains(GenericToken.Int) || staticTokens.Contains(GenericToken.Double))
            {
                FSMBuilder = FSMBuilder.GoTo(start)
                             .RangeTransition('0', '9')
                             .Mark(in_int)
                             .RangeTransitionTo('0', '9', in_int)
                             .End(GenericToken.Int);
                if (staticTokens.Contains(GenericToken.Double))
                {
                    FSMBuilder.Transition('.')
                    .Mark(start_double)
                    .RangeTransition('0', '9')
                    .Mark(in_double)
                    .RangeTransitionTo('0', '9', in_double)
                    .End(GenericToken.Double);
                }
            }

            LexerFsm = FSMBuilder.Fsm;
        }
Пример #3
0
        public void AddCharLexem(IN token, BuildResult <ILexer <IN> > result, string charDelimiter, string escapeDelimiterChar = "\\")
        {
            if (string.IsNullOrEmpty(charDelimiter) || charDelimiter.Length > 1)
            {
                result.AddError(new InitializationError(ErrorLevel.FATAL,
                                                        $"bad lexem {charDelimiter} :  CharToken lexeme delimiter char <{token.ToString()}> must be 1 character length.",
                                                        ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_MUST_BE_1_CHAR));
            }
            if (charDelimiter.Length == 1 && char.IsLetterOrDigit(charDelimiter[0]))
            {
                result.AddError(new InitializationError(ErrorLevel.FATAL,
                                                        $"bad lexem {charDelimiter} :  CharToken lexeme delimiter char <{token.ToString()}> can not start with a letter or digit.", ErrorCodes.LEXER_CHAR_DELIMITER_CANNOT_BE_LETTER));
            }

            if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1)
            {
                result.AddError(new InitializationError(ErrorLevel.FATAL,
                                                        $"bad lexem {escapeDelimiterChar} :  CharToken lexeme escape char  <{token.ToString()}> must be 1 character length.", ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_MUST_BE_1_CHAR));
            }
            if (escapeDelimiterChar.Length == 1 && char.IsLetterOrDigit(escapeDelimiterChar[0]))
            {
                result.AddError(new InitializationError(ErrorLevel.FATAL,
                                                        $"bad lexem {escapeDelimiterChar} :  CharToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter or digit.", ErrorCodes.LEXER_CHAR_ESCAPE_CHAR_CANNOT_BE_LETTER_OR_DIGIT));
            }

            CharCounter++;

            var charDelimiterChar = charDelimiter[0];

            var escapeChar = escapeDelimiterChar[0];


            NodeCallback <GenericToken> callback = match =>
            {
                match.Properties[DerivedToken] = token;
                var value = match.Result.SpanValue;

                match.Result.SpanValue = value;
                return(match);
            };

            FSMBuilder.GoTo(start);
            FSMBuilder.Transition(charDelimiterChar)
            .Mark(start_char + "_" + CharCounter)
            .ExceptTransition(new[] { charDelimiterChar, escapeChar })
            .Mark(in_char + "_" + CharCounter)
            .Transition(charDelimiterChar)
            .Mark(end_char + "_" + CharCounter)
            .End(GenericToken.Char)
            .CallBack(callback)
            .GoTo(start_char + "_" + CharCounter)
            .Transition(escapeChar)
            .Mark(escapeChar_char + "_" + CharCounter)
            .ExceptTransitionTo(new[] { 'u' }, in_char + "_" + CharCounter)
            .CallBack(callback);
            FSMBuilder.Fsm.StringDelimiter = charDelimiterChar;

            // TODO : unicode transitions
            FSMBuilder = FSMBuilder.GoTo(escapeChar_char + "_" + CharCounter)
                         .Transition('u')
                         .Mark(unicode_char + "_" + CharCounter)
                         .RepetitionTransitionTo(in_char + "_" + CharCounter, 4, "[0-9,a-z,A-Z]");
        }
Пример #4
0
        private static void testLexerBuilder()
        {
            var builder = new FSMLexerBuilder <JsonToken>();


            // conf
            builder.IgnoreWS()
            .WhiteSpace(' ')
            .WhiteSpace('\t')
            .IgnoreEOL();

            // start machine definition
            builder.Mark("start");


            // string literal
            builder.Transition('\"')
            .Mark("in_string")
            .ExceptTransitionTo(new[] { '\"', '\\' }, "in_string")
            .Transition('\\')
            .Mark("escape")
            .AnyTransitionTo(' ', "in_string")
            .Transition('\"')
            .End(JsonToken.STRING)
            .Mark("string_end")
            .CallBack(match =>
            {
                match.Result.Value = match.Result.Value.ToUpper();
                return(match);
            });

            // accolades
            builder.GoTo("start")
            .Transition('{')
            .End(JsonToken.ACCG);

            builder.GoTo("start")
            .Transition('}')
            .End(JsonToken.ACCD);

            // corchets
            builder.GoTo("start")
            .Transition('[')
            .End(JsonToken.CROG);

            builder.GoTo("start")
            .Transition(']')
            .End(JsonToken.CROD);

            // 2 points
            builder.GoTo("start")
            .Transition(':')
            .End(JsonToken.COLON);

            // comma
            builder.GoTo("start")
            .Transition(',')
            .End(JsonToken.COMMA);

            //numeric
            builder.GoTo("start")
            .RangeTransition('0', '9')
            .Mark("in_int")
            .RangeTransitionTo('0', '9', "in_int")
            .End(JsonToken.INT)
            .Transition('.')
            .Mark("start_double")
            .RangeTransition('0', '9')
            .Mark("in_double")
            .RangeTransitionTo('0', '9', "in_double")
            .End(JsonToken.DOUBLE);


            var code = "{\n\"d\" : 42.42 ,\n\"i\" : 42 ,\n\"s\" : \"quarante-deux\",\n\"s2\":\"a\\\"b\"\n}";
            //code = File.ReadAllText("test.json");
            var lex   = builder.Fsm;
            var r     = lex.Run(code, 0);
            var total = "";

            while (r.IsSuccess)
            {
                var msg = $"{r.Result.TokenID} : {r.Result.Value} @{r.Result.Position}";
                total += msg + "\n";
                Console.WriteLine(msg);
                r = lex.Run(code);
            }
        }
Пример #5
0
        public void AddCharLexem(IN token, string charDelimiter, string escapeDelimiterChar = "\\")
        {
            if (string.IsNullOrEmpty(charDelimiter) || charDelimiter.Length > 1)
            {
                throw new InvalidLexerException(
                          $"bad lexem {charDelimiter} :  CharToken lexeme delimiter char <{token.ToString()}> must be 1 character length.");
            }
            if (char.IsLetterOrDigit(charDelimiter[0]))
            {
                throw new InvalidLexerException(
                          $"bad lexem {charDelimiter} :  CharToken lexeme delimiter char <{token.ToString()}> can not start with a letter.");
            }

            if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1)
            {
                throw new InvalidLexerException(
                          $"bad lexem {escapeDelimiterChar} :  CharToken lexeme escape char  <{token.ToString()}> must be 1 character length.");
            }
            if (char.IsLetterOrDigit(escapeDelimiterChar[0]))
            {
                throw new InvalidLexerException(
                          $"bad lexem {escapeDelimiterChar} :  CharToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter.");
            }

            CharCounter++;

            var charDelimiterChar = charDelimiter[0];

            var escapeChar = escapeDelimiterChar[0];


            NodeCallback <GenericToken> callback = match =>
            {
                match.Properties[DerivedToken] = token;
                var value = match.Result.SpanValue;

                match.Result.SpanValue = value;
                return(match);
            };

            FSMBuilder.GoTo(start);
            FSMBuilder.Transition(charDelimiterChar)
            .Mark(start_char + "_" + CharCounter)
            .ExceptTransition(new[] { charDelimiterChar, escapeChar })
            .Mark(in_char + "_" + CharCounter)
            .Transition(charDelimiterChar)
            .Mark(end_char + "_" + CharCounter)
            .End(GenericToken.Char)
            .CallBack(callback)
            .GoTo(start_char + "_" + CharCounter)
            .Transition(escapeChar)
            .Mark(escapeChar_char + "_" + CharCounter)
            .ExceptTransitionTo(new[] { 'u' }, in_char + "_" + CharCounter)
            .CallBack(callback);
            FSMBuilder.Fsm.StringDelimiter = charDelimiterChar;

            // TODO : unicode transitions
            FSMBuilder = FSMBuilder.GoTo(escapeChar_char + "_" + CharCounter)
                         .Transition('u')
                         .Mark(unicode_char + "_" + CharCounter)
                         .RepetitionTransitionTo(in_char + "_" + CharCounter, 4, "[0-9,a-z,A-Z]");
        }