Beispiel #1
0
        public void RegexToDfaShouldConvertCharacterRegexToDfa()
        {
            var pattern = "a";
            var regex = new RegexParser().Parse(pattern);
            var nfa = new ThompsonConstructionAlgorithm().Transform(regex);
            var dfa = new SubsetConstructionAlgorithm().Transform(nfa);
            Assert.IsNotNull(dfa);

            var lexerRule = new DfaLexerRule(dfa, "a");
            var lexeme = new DfaLexemeFactory().Create(lexerRule);
            Assert.IsTrue(lexeme.Scan('a'));
        }
Beispiel #2
0
        public void DfaLexemeShouldMatchOneOrMoreRandomWhitespaceCharacters()
        {
            var randomWhitespace = "\t\f\v \r\n";
            var dfa = new DfaState();
            var final = new DfaState(true);
            dfa.AddTransition(new DfaTransition(new WhitespaceTerminal(), final));
            final.AddTransition(new DfaTransition(new WhitespaceTerminal(), final));

            var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("whitespace"));
            var whitespaceLexeme = new DfaLexeme(dfaLexerRule);
            for (int i = 0; i < randomWhitespace.Length; i++)
                Assert.IsTrue(whitespaceLexeme.Scan(randomWhitespace[i]));
        }
Beispiel #3
0
        public void DfaLexemeGivenCharacerLexemeNumberShouldFail()
        {
            var numberInput = "0";
            var dfa = new DfaState();
            var final = new DfaState(true);
            dfa.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final));
            final.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final));

            var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("lowerCase"));
            var letterLexeme = new DfaLexeme(dfaLexerRule);
            Assert.IsFalse(letterLexeme.Scan(numberInput[0]));
            Assert.AreEqual(string.Empty, letterLexeme.Capture);
        }
Beispiel #4
0
 public void RegexToDfaShouldConvertOptionalCharacterClassToDfa()
 {
     var pattern = @"[-+]?[0-9]";
     var regex = new RegexParser().Parse(pattern);
     var nfa = new ThompsonConstructionAlgorithm().Transform(regex);
     var dfa = new SubsetConstructionAlgorithm().Transform(nfa);
     Assert.IsNotNull(dfa);
     Assert.AreEqual(3, dfa.Transitions.Count);
     var lexerRule = new DfaLexerRule(dfa, pattern);
     AssertLexerRuleMatches(lexerRule, "+0");
     AssertLexerRuleMatches(lexerRule, "-1");
     AssertLexerRuleMatches(lexerRule, "9");
 }
Beispiel #5
0
        public void DfaLexemeShouldMatchMixedCaseWord()
        {
            var wordInput = "t90vAriabl3";
            var dfa = new DfaState();
            var final = new DfaState(true);
            dfa.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final));
            dfa.AddTransition(new DfaTransition(new RangeTerminal('A', 'Z'), final));
            final.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final));
            final.AddTransition(new DfaTransition(new RangeTerminal('A', 'Z'), final));
            final.AddTransition(new DfaTransition(new DigitTerminal(), final));

            var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("Identifier"));
            var indentifierLexeme = new DfaLexeme(dfaLexerRule);
            for (int i = 0; i < wordInput.Length; i++)
                Assert.IsTrue(indentifierLexeme.Scan(wordInput[i]));
        }
Beispiel #6
0
 private static BaseLexerRule CreateWhitespaceLexerRule()
 {
     var whitespaceTerminal = new WhitespaceTerminal();
     var startWhitespace = new DfaState();
     var finalWhitespace = new DfaState(true);
     startWhitespace.AddTransition(new DfaTransition(whitespaceTerminal, finalWhitespace));
     finalWhitespace.AddTransition(new DfaTransition(whitespaceTerminal, finalWhitespace));
     var whitespace = new DfaLexerRule(startWhitespace, "whitespace");
     return whitespace;
 }
Beispiel #7
0
 private static BaseLexerRule CreateIdentifierLexerRule()
 {
     // /[a-zA-Z][a-zA-Z0-9-_]*/
     var identifierState = new DfaState();
     var zeroOrMoreLetterOrDigit = new DfaState(true);
     identifierState.AddTransition(
         new DfaTransition(
             new CharacterClassTerminal(
                 new RangeTerminal('a', 'z'),
                 new RangeTerminal('A', 'Z')),
             zeroOrMoreLetterOrDigit));
     zeroOrMoreLetterOrDigit.AddTransition(
         new DfaTransition(
             new CharacterClassTerminal(
                 new RangeTerminal('a', 'z'),
                 new RangeTerminal('A', 'Z'),
                 new DigitTerminal(),
                 new SetTerminal('-', '_')),
             zeroOrMoreLetterOrDigit));
     var identifier = new DfaLexerRule(identifierState, "identifier");
     return identifier;
 }
Beispiel #8
0
 public DfaLexeme(DfaLexerRule lexerRule, int position)
     : base(lexerRule, position)
 {
     this.captureBuilder = ObjectPoolExtensions.Allocate(SharedPools.Default <StringBuilder>());
     this.currentState   = lexerRule.Start;
 }
Beispiel #9
0
 private static ILexerRule CreateRuleNameLexerRule()
 {
     var ruleNameState = new DfaState();
     var zeroOrMoreLetterOrDigit = new DfaState(true);
     ruleNameState.AddTransition(
         new DfaTransition(
             new CharacterClassTerminal(
                 new RangeTerminal('a', 'z'),
                 new RangeTerminal('A', 'Z')),
             zeroOrMoreLetterOrDigit));
     zeroOrMoreLetterOrDigit.AddTransition(
         new DfaTransition(
             new CharacterClassTerminal(
                 new RangeTerminal('a', 'z'),
                 new RangeTerminal('A', 'Z'),
                 new DigitTerminal(),
                 new SetTerminal('-', '_')),
             zeroOrMoreLetterOrDigit));
     var ruleName = new DfaLexerRule(ruleNameState, new TokenType("rule-name"));
     return ruleName;
 }
        IEnumerable<ProductionModel> Factor(EbnfFactor factor, ProductionModel currentProduction)
        {
            switch (factor.NodeType)
            {
                case EbnfNodeType.EbnfFactorGrouping:
                    var grouping = factor as EbnfFactorGrouping;
                    foreach (var production in Grouping(grouping, currentProduction))
                        yield return production;
                    break;

                case EbnfNodeType.EbnfFactorOptional:
                    var optional = factor as EbnfFactorOptional;
                    foreach (var production in Optional(optional, currentProduction))
                        yield return production;
                    break;

                case EbnfNodeType.EbnfFactorRepetition:
                    var repetition = factor as EbnfFactorRepetition;
                    foreach (var production in Repetition(repetition, currentProduction))
                        yield return production;
                    break;

                case EbnfNodeType.EbnfFactorIdentifier:
                    var identifier = factor as EbnfFactorIdentifier;
                    var nonTerminal = GetNonTerminalFromQualifiedIdentifier(identifier.QualifiedIdentifier);
                    currentProduction.AddWithAnd(new NonTerminalModel(nonTerminal));
                    break;

                case EbnfNodeType.EbnfFactorLiteral:
                    var literal = factor as EbnfFactorLiteral;
                    var stringLiteralRule = new StringLiteralLexerRule(literal.Value);
                    currentProduction.AddWithAnd( new LexerRuleModel(stringLiteralRule));
                    break;

                case EbnfNodeType.EbnfFactorRegex:
                    var regex = factor as EbnfFactorRegex;
                    var nfa = _thompsonConstructionAlgorithm.Transform(regex.Regex);
                    var dfa = _subsetConstructionAlgorithm.Transform(nfa);
                    var dfaLexerRule = new DfaLexerRule(dfa, regex.Regex.ToString());
                    currentProduction.AddWithAnd(new LexerRuleModel(dfaLexerRule));
                    break;
            }
        }