public void RegexToDfaShouldConvertCharacterRegexToDfa() { var pattern = "a"; var regex = new RegexParser().Parse(pattern); var nfa = new ThompsonConstructionAlgorithm().Transform(regex); var dfa = new SubsetConstructionAlgorithm().Transform(nfa); Assert.IsNotNull(dfa); var lexerRule = new DfaLexerRule(dfa, "a"); var lexeme = new DfaLexemeFactory().Create(lexerRule); Assert.IsTrue(lexeme.Scan('a')); }
public void DfaLexemeShouldMatchOneOrMoreRandomWhitespaceCharacters() { var randomWhitespace = "\t\f\v \r\n"; var dfa = new DfaState(); var final = new DfaState(true); dfa.AddTransition(new DfaTransition(new WhitespaceTerminal(), final)); final.AddTransition(new DfaTransition(new WhitespaceTerminal(), final)); var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("whitespace")); var whitespaceLexeme = new DfaLexeme(dfaLexerRule); for (int i = 0; i < randomWhitespace.Length; i++) Assert.IsTrue(whitespaceLexeme.Scan(randomWhitespace[i])); }
public void DfaLexemeGivenCharacerLexemeNumberShouldFail() { var numberInput = "0"; var dfa = new DfaState(); var final = new DfaState(true); dfa.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final)); final.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final)); var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("lowerCase")); var letterLexeme = new DfaLexeme(dfaLexerRule); Assert.IsFalse(letterLexeme.Scan(numberInput[0])); Assert.AreEqual(string.Empty, letterLexeme.Capture); }
public void RegexToDfaShouldConvertOptionalCharacterClassToDfa() { var pattern = @"[-+]?[0-9]"; var regex = new RegexParser().Parse(pattern); var nfa = new ThompsonConstructionAlgorithm().Transform(regex); var dfa = new SubsetConstructionAlgorithm().Transform(nfa); Assert.IsNotNull(dfa); Assert.AreEqual(3, dfa.Transitions.Count); var lexerRule = new DfaLexerRule(dfa, pattern); AssertLexerRuleMatches(lexerRule, "+0"); AssertLexerRuleMatches(lexerRule, "-1"); AssertLexerRuleMatches(lexerRule, "9"); }
public void DfaLexemeShouldMatchMixedCaseWord() { var wordInput = "t90vAriabl3"; var dfa = new DfaState(); var final = new DfaState(true); dfa.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final)); dfa.AddTransition(new DfaTransition(new RangeTerminal('A', 'Z'), final)); final.AddTransition(new DfaTransition(new RangeTerminal('a', 'z'), final)); final.AddTransition(new DfaTransition(new RangeTerminal('A', 'Z'), final)); final.AddTransition(new DfaTransition(new DigitTerminal(), final)); var dfaLexerRule = new DfaLexerRule(dfa, new TokenType("Identifier")); var indentifierLexeme = new DfaLexeme(dfaLexerRule); for (int i = 0; i < wordInput.Length; i++) Assert.IsTrue(indentifierLexeme.Scan(wordInput[i])); }
private static BaseLexerRule CreateWhitespaceLexerRule() { var whitespaceTerminal = new WhitespaceTerminal(); var startWhitespace = new DfaState(); var finalWhitespace = new DfaState(true); startWhitespace.AddTransition(new DfaTransition(whitespaceTerminal, finalWhitespace)); finalWhitespace.AddTransition(new DfaTransition(whitespaceTerminal, finalWhitespace)); var whitespace = new DfaLexerRule(startWhitespace, "whitespace"); return whitespace; }
private static BaseLexerRule CreateIdentifierLexerRule() { // /[a-zA-Z][a-zA-Z0-9-_]*/ var identifierState = new DfaState(); var zeroOrMoreLetterOrDigit = new DfaState(true); identifierState.AddTransition( new DfaTransition( new CharacterClassTerminal( new RangeTerminal('a', 'z'), new RangeTerminal('A', 'Z')), zeroOrMoreLetterOrDigit)); zeroOrMoreLetterOrDigit.AddTransition( new DfaTransition( new CharacterClassTerminal( new RangeTerminal('a', 'z'), new RangeTerminal('A', 'Z'), new DigitTerminal(), new SetTerminal('-', '_')), zeroOrMoreLetterOrDigit)); var identifier = new DfaLexerRule(identifierState, "identifier"); return identifier; }
public DfaLexeme(DfaLexerRule lexerRule, int position) : base(lexerRule, position) { this.captureBuilder = ObjectPoolExtensions.Allocate(SharedPools.Default <StringBuilder>()); this.currentState = lexerRule.Start; }
private static ILexerRule CreateRuleNameLexerRule() { var ruleNameState = new DfaState(); var zeroOrMoreLetterOrDigit = new DfaState(true); ruleNameState.AddTransition( new DfaTransition( new CharacterClassTerminal( new RangeTerminal('a', 'z'), new RangeTerminal('A', 'Z')), zeroOrMoreLetterOrDigit)); zeroOrMoreLetterOrDigit.AddTransition( new DfaTransition( new CharacterClassTerminal( new RangeTerminal('a', 'z'), new RangeTerminal('A', 'Z'), new DigitTerminal(), new SetTerminal('-', '_')), zeroOrMoreLetterOrDigit)); var ruleName = new DfaLexerRule(ruleNameState, new TokenType("rule-name")); return ruleName; }
IEnumerable<ProductionModel> Factor(EbnfFactor factor, ProductionModel currentProduction) { switch (factor.NodeType) { case EbnfNodeType.EbnfFactorGrouping: var grouping = factor as EbnfFactorGrouping; foreach (var production in Grouping(grouping, currentProduction)) yield return production; break; case EbnfNodeType.EbnfFactorOptional: var optional = factor as EbnfFactorOptional; foreach (var production in Optional(optional, currentProduction)) yield return production; break; case EbnfNodeType.EbnfFactorRepetition: var repetition = factor as EbnfFactorRepetition; foreach (var production in Repetition(repetition, currentProduction)) yield return production; break; case EbnfNodeType.EbnfFactorIdentifier: var identifier = factor as EbnfFactorIdentifier; var nonTerminal = GetNonTerminalFromQualifiedIdentifier(identifier.QualifiedIdentifier); currentProduction.AddWithAnd(new NonTerminalModel(nonTerminal)); break; case EbnfNodeType.EbnfFactorLiteral: var literal = factor as EbnfFactorLiteral; var stringLiteralRule = new StringLiteralLexerRule(literal.Value); currentProduction.AddWithAnd( new LexerRuleModel(stringLiteralRule)); break; case EbnfNodeType.EbnfFactorRegex: var regex = factor as EbnfFactorRegex; var nfa = _thompsonConstructionAlgorithm.Transform(regex.Regex); var dfa = _subsetConstructionAlgorithm.Transform(nfa); var dfaLexerRule = new DfaLexerRule(dfa, regex.Regex.ToString()); currentProduction.AddWithAnd(new LexerRuleModel(dfaLexerRule)); break; } }