Beispiel #1
0
        private void InitializeStaticLexer(IdentifierType idType = IdentifierType.Alpha,
                                           params GenericToken[] staticTokens)
        {
            FSMBuilder    = new FSMLexerBuilder <GenericToken>();
            StringCounter = 0;

            // conf
            FSMBuilder.IgnoreWS()
            .WhiteSpace(' ')
            .WhiteSpace('\t')
            .IgnoreEOL();

            // start machine definition
            FSMBuilder.Mark(start);

            if (staticTokens.ToList().Contains(GenericToken.Identifier) ||
                staticTokens.ToList().Contains(GenericToken.KeyWord))
            {
                InitializeIdentifier(idType);
            }

            //numeric
            if (staticTokens.ToList().Contains(GenericToken.Int) || staticTokens.ToList().Contains(GenericToken.Double))
            {
                FSMBuilder = FSMBuilder.GoTo(start)
                             .RangeTransition('0', '9')
                             .Mark(in_int)
                             .RangeTransitionTo('0', '9', in_int)
                             .End(GenericToken.Int);
                if (staticTokens.ToList().Contains(GenericToken.Double))
                {
                    FSMBuilder.Transition('.')
                    .Mark(start_double)
                    .RangeTransition('0', '9')
                    .Mark(in_double)
                    .RangeTransitionTo('0', '9', in_double)
                    .End(GenericToken.Double);
                }
            }

            LexerFsm = FSMBuilder.Fsm;
        }
Beispiel #2
0
        private void InitializeStaticLexer(Config config, GenericToken[] staticTokens)
        {
            FSMBuilder    = new FSMLexerBuilder <GenericToken>();
            StringCounter = 0;

            // conf
            FSMBuilder
            .IgnoreWS(config.IgnoreWS)
            .WhiteSpace(config.WhiteSpace)
            .IgnoreEOL(config.IgnoreEOL);

            // start machine definition
            FSMBuilder.Mark(start);

            if (staticTokens.Contains(GenericToken.Identifier) || staticTokens.Contains(GenericToken.KeyWord))
            {
                InitializeIdentifier(config);
            }

            // numeric
            if (staticTokens.Contains(GenericToken.Int) || staticTokens.Contains(GenericToken.Double))
            {
                FSMBuilder = FSMBuilder.GoTo(start)
                             .RangeTransition('0', '9')
                             .Mark(in_int)
                             .RangeTransitionTo('0', '9', in_int)
                             .End(GenericToken.Int);
                if (staticTokens.Contains(GenericToken.Double))
                {
                    FSMBuilder.Transition('.')
                    .Mark(start_double)
                    .RangeTransition('0', '9')
                    .Mark(in_double)
                    .RangeTransitionTo('0', '9', in_double)
                    .End(GenericToken.Double);
                }
            }

            LexerFsm = FSMBuilder.Fsm;
        }
Beispiel #3
0
        private void InitializeIdentifier(Config config)
        {
            // identifier
            if (config.IdType == IdentifierType.Custom)
            {
                var marked = false;
                foreach (var pattern in config.IdentifierStartPattern)
                {
                    FSMBuilder.GoTo(start);
                    if (pattern.Length == 1)
                    {
                        if (marked)
                        {
                            FSMBuilder.TransitionTo(pattern[0], in_identifier);
                        }
                        else
                        {
                            FSMBuilder.Transition(pattern[0]).Mark(in_identifier).End(GenericToken.Identifier);
                            marked = true;
                        }
                    }
                    else
                    {
                        if (marked)
                        {
                            FSMBuilder.RangeTransitionTo(pattern[0], pattern[1], in_identifier);
                        }
                        else
                        {
                            FSMBuilder.RangeTransition(pattern[0], pattern[1]).Mark(in_identifier).End(GenericToken.Identifier);
                            marked = true;
                        }
                    }
                }

                foreach (var pattern in config.IdentifierRestPattern)
                {
                    if (pattern.Length == 1)
                    {
                        FSMBuilder.TransitionTo(pattern[0], in_identifier);
                    }
                    else
                    {
                        FSMBuilder.RangeTransitionTo(pattern[0], pattern[1], in_identifier);
                    }
                }
            }
            else
            {
                FSMBuilder
                .GoTo(start)
                .RangeTransition('a', 'z')
                .Mark(in_identifier)
                .GoTo(start)
                .RangeTransitionTo('A', 'Z', in_identifier)
                .RangeTransitionTo('a', 'z', in_identifier)
                .RangeTransitionTo('A', 'Z', in_identifier)
                .End(GenericToken.Identifier);

                if (config.IdType == IdentifierType.AlphaNumeric || config.IdType == IdentifierType.AlphaNumericDash)
                {
                    FSMBuilder
                    .GoTo(in_identifier)
                    .RangeTransitionTo('0', '9', in_identifier);
                }

                if (config.IdType == IdentifierType.AlphaNumericDash)
                {
                    FSMBuilder
                    .GoTo(start)
                    .TransitionTo('_', in_identifier)
                    .TransitionTo('_', in_identifier)
                    .TransitionTo('-', in_identifier);
                }
            }
        }
Beispiel #4
0
        public void AddStringLexem(IN token, string stringDelimiter, string escapeDelimiterChar = "\\")
        {
            if (string.IsNullOrEmpty(stringDelimiter) || stringDelimiter.Length > 1)
            {
                throw new InvalidLexerException($"bad lexem {stringDelimiter} :  StringToken lexeme delimiter char <{token.ToString()}> must be 1 character length.");
            }
            if (char.IsLetterOrDigit(stringDelimiter[0]))
            {
                throw new InvalidLexerException($"bad lexem {stringDelimiter} :  StringToken lexeme delimiter char <{token.ToString()}> can not start with a letter.");
            }

            if (string.IsNullOrEmpty(escapeDelimiterChar) || escapeDelimiterChar.Length > 1)
            {
                throw new InvalidLexerException($"bad lexem {escapeDelimiterChar} :  StringToken lexeme escape char  <{token.ToString()}> must be 1 character length.");
            }
            if (char.IsLetterOrDigit(escapeDelimiterChar[0]))
            {
                throw new InvalidLexerException($"bad lexem {escapeDelimiterChar} :  StringToken lexeme escape char lexeme <{token.ToString()}> can not start with a letter.");
            }

            StringCounter++;

            StringDelimiterChar = stringDelimiter[0];

            EscapeStringDelimiterChar = escapeDelimiterChar[0];



            NodeCallback <GenericToken> callback = (FSMMatch <GenericToken> match) =>
            {
                match.Properties[DerivedToken] = token;
                string value = match.Result.Value;

                match.Result.Value = value;
                return(match);
            };

            if (StringDelimiterChar != EscapeStringDelimiterChar)
            {
                FSMBuilder.GoTo(start);
                FSMBuilder.Transition(StringDelimiterChar)
                .Mark(in_string + StringCounter)
                .ExceptTransitionTo(new char[] { StringDelimiterChar, EscapeStringDelimiterChar }, in_string + StringCounter)
                .Transition(EscapeStringDelimiterChar)
                .Mark(escape_string + StringCounter)
                .AnyTransitionTo(' ', in_string + StringCounter)
                .Transition(StringDelimiterChar)
                .End(GenericToken.String)
                .Mark(string_end + StringCounter)
                .CallBack(callback);
                FSMBuilder.Fsm.StringDelimiter = StringDelimiterChar;
            }
            else
            {
                NodeAction collapseDelimiter = (string value) =>
                {
                    if (value.EndsWith("" + StringDelimiterChar + StringDelimiterChar))
                    {
                        return(value.Substring(0, value.Length - 2) + StringDelimiterChar);
                    }
                    return(value);
                };

                var    exceptDelimiter = new char[] { StringDelimiterChar };
                string in_string       = "in_string_same";
                string escaped         = "escaped_same";
                string delim           = "delim_same";

                FSMBuilder.GoTo(start)
                .Transition(StringDelimiterChar)
                .Mark(in_string + StringCounter)
                .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter)
                .Transition(StringDelimiterChar)

                .Mark(escaped + StringCounter)
                .End(GenericToken.String)
                .CallBack(callback)
                .Transition(StringDelimiterChar)

                .Mark(delim + StringCounter)
                .Action(collapseDelimiter)
                .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter);

                FSMBuilder.GoTo(delim + StringCounter)
                .TransitionTo(StringDelimiterChar, escaped + StringCounter)

                .ExceptTransitionTo(exceptDelimiter, in_string + StringCounter);
            }
        }
Beispiel #5
0
        private static void testLexerBuilder()
        {
            var builder = new FSMLexerBuilder <JsonToken>();


            // conf
            builder.IgnoreWS()
            .WhiteSpace(' ')
            .WhiteSpace('\t')
            .IgnoreEOL();

            // start machine definition
            builder.Mark("start");


            // string literal
            builder.Transition('\"')
            .Mark("in_string")
            .ExceptTransitionTo(new[] { '\"', '\\' }, "in_string")
            .Transition('\\')
            .Mark("escape")
            .AnyTransitionTo(' ', "in_string")
            .Transition('\"')
            .End(JsonToken.STRING)
            .Mark("string_end")
            .CallBack(match =>
            {
                match.Result.Value = match.Result.Value.ToUpper();
                return(match);
            });

            // accolades
            builder.GoTo("start")
            .Transition('{')
            .End(JsonToken.ACCG);

            builder.GoTo("start")
            .Transition('}')
            .End(JsonToken.ACCD);

            // corchets
            builder.GoTo("start")
            .Transition('[')
            .End(JsonToken.CROG);

            builder.GoTo("start")
            .Transition(']')
            .End(JsonToken.CROD);

            // 2 points
            builder.GoTo("start")
            .Transition(':')
            .End(JsonToken.COLON);

            // comma
            builder.GoTo("start")
            .Transition(',')
            .End(JsonToken.COMMA);

            //numeric
            builder.GoTo("start")
            .RangeTransition('0', '9')
            .Mark("in_int")
            .RangeTransitionTo('0', '9', "in_int")
            .End(JsonToken.INT)
            .Transition('.')
            .Mark("start_double")
            .RangeTransition('0', '9')
            .Mark("in_double")
            .RangeTransitionTo('0', '9', "in_double")
            .End(JsonToken.DOUBLE);


            var code = "{\n\"d\" : 42.42 ,\n\"i\" : 42 ,\n\"s\" : \"quarante-deux\",\n\"s2\":\"a\\\"b\"\n}";
            //code = File.ReadAllText("test.json");
            var lex   = builder.Fsm;
            var r     = lex.Run(code, 0);
            var total = "";

            while (r.IsSuccess)
            {
                var msg = $"{r.Result.TokenID} : {r.Result.Value} @{r.Result.Position}";
                total += msg + "\n";
                Console.WriteLine(msg);
                r = lex.Run(code);
            }
        }