public Dfa <TLetter> ComputeDfa() { var regex = this.ComputeRx(); var nfa = NfaBuilder <TLetter> .Build(regex); return(DfaBuilder <TLetter> .Build(nfa, this.eof, true)); }
public void Test() { //make pattern for whole numbers divisible by 3 //digits mod 3 var d0 = CharRange.AnyOf("0369"); var d1 = Pattern.Match(CharRange.AnyOf("147")).ThenMaybeRepeat(d0); var d2 = Pattern.Match(CharRange.AnyOf("258")).ThenMaybeRepeat(d0); var plus2 = Pattern.MaybeRepeat(d1.Then(d2)).Then(Pattern.AnyOf(d1.Then(d1), d2)); var minus2 = Pattern.MaybeRepeat(d2.Then(d1)).Then(Pattern.AnyOf(d2.Then(d2), d1)); var by3 = Pattern.MaybeRepeat(Pattern.AnyOf(d0, d1.Then(d2), plus2.Then(minus2))); var builder = new DfaBuilder <bool>(); builder.AddPattern(by3, true); var start = builder.Build(new HashSet <bool> { true }, null); Assert.Equal(3, CountStates(start)); CheckDfa(start, "By3Test.out.txt", false); }
public void TestTo100K() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { builder.AddPattern(Pattern.Match(i.ToString()), i % 7); } var stopWatch = new Stopwatch(); stopWatch.Start(); var start = builder.Build(null); var numstates = CountStates(start); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Mininmized 100000 numbers -> value mod 7 (down to {numstates} states) in {telapsed * .001} seconds"); Assert.False(StringMatcher <int> .MatchWholeString(start, "", out _)); Assert.False(StringMatcher <int> .MatchWholeString(start, "100001", out _)); for (var i = 0; i < 100000; ++i) { Assert.True(StringMatcher <int> .MatchWholeString(start, i.ToString(), out var result)); Assert.Equal(i % 7, result); } Assert.Equal(36, numstates); }
private static DfaState <bool> CreateMatcher() { var builder = new DfaBuilder <bool>(); builder.AddPattern(DfaLex.Pattern.Regex(Pattern), true); return(builder.Build(null)); }
private int TimeMatcher(string src, string patString) { DfaState <bool> startState; { var builder = new DfaBuilder <bool>(); builder.AddPattern(Pattern.Regex(patString), true); startState = builder.Build(null); } var count = 0; var stopWatch = new Stopwatch(); stopWatch.Start(); for (var t = stopWatch.ElapsedMilliseconds; t < SpinUp + 1000; t = stopWatch.ElapsedMilliseconds) { var m = new StringMatcher <bool>(src); if (m.FindNext(startState, out _)) { throw new Exception("not supposed to find a match"); } if (t >= SpinUp) { ++count; } } return(count); }
public void TestStringMatcher() { DfaState <int> dfa; { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("a[ab]*b"), 1); builder.AddPattern(Pattern.Regex("a[ab]*c"), 2); dfa = builder.Build(null); } var matcher = new StringMatcher <int>("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa"); var found = matcher.FindNext(dfa, out var result); Assert.True(found); Assert.Equal(2, result); Assert.Equal("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.LastMatch); Assert.Equal(5, matcher.LastMatchStart); Assert.Equal(34, matcher.LastMatchEnd); found = matcher.FindNext(dfa, out result); Assert.True(found); Assert.Equal(1, result); Assert.Equal("aaaaaaabbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); Assert.Equal(0, result); matcher.SetPositions(15, 20, 33); Assert.Equal("aaaaa", matcher.LastMatch); matcher.FindNext(dfa, out result); Assert.Equal("aaaaaaaaabbbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); }
public void Dfab_StartsWith() { var dfab = new DfaBuilder("BB][CC"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Debug.WriteLine(dfab.ParseTree.ToString()); Assert.IsTrue(dfab.States[2].Starts); }
public Dfa <LetterId> ComputeDfa(out AlphabetBuilder <TLetter> alphabet, RangeSet <TLetter> validRanges = default) { var regex = this.ComputeRx(); alphabet = new AlphabetBuilder <TLetter>(regex, this.eof, validRanges); var nfa = NfaBuilder <LetterId> .Build(alphabet.Expression); return(DfaBuilder <LetterId> .Build(nfa, LetterId.Eof, true)); }
public void Dfab_Simple() { var dfab = new DfaBuilder("af"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Assert.AreEqual(2, dfab.States.Length, "Should have 2 states."); Assert.AreEqual(1, dfab.Transitions[0, 0xAF]); }
private string PToString(IMatchable p) { var builder = new DfaBuilder <bool>(); builder.AddPattern(p, true); var dfa = builder.Build(null); return(PrettyPrinter.Print(dfa)); }
public void DfaWithIntResultsTest() { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("ab"), 0); builder.AddPattern(Pattern.Regex("bb"), 1); var start = builder.Build(null); CheckDfa(start, "StructTests-2.txt", false); }
public void DfaWithEnumResultsTest() { var builder = new DfaBuilder <EnumToken>(); builder.AddPattern(Pattern.Match("if"), EnumToken.If); builder.AddPattern(Pattern.Regex("([A-Za-z])([A-Za-z0-9])*"), EnumToken.Id); var start = builder.Build(accepts => accepts.First()); CheckDfa(start, "StructTests-1.txt", false); }
public void Dfab_MatchAny() { var dfab = new DfaBuilder("??"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Debug.WriteLine(dfab.ParseTree.ToString()); Assert.AreEqual(2, dfab.States.Length, "Should have 2 states"); Assert.AreEqual(1, dfab.Transitions[0, 0xAF]); }
public void Dfab_StartsWith() { var dfab = new DfaBuilder("BB][CC"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); //Debug.WriteLine(dfab.ParseTree.ToString()); Assert.IsTrue(dfab.States[2].Starts); }
protected DfaState(TDfaState state) { Nodes = state.Nodes; LazyTransitions = state.LazyTransitions; HashCode = state.HashCode; SuccessSubGraph = state.PrevSuccessTransition?.SubGraph; Break = true; Builder = state.Builder; }
public void Dfab_MatchAny() { var dfab = new DfaBuilder("??"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Assert.AreEqual(2, dfab.States.Length, "Should have 2 states"); Assert.AreEqual(1, dfab.Transitions[0, 0xAF]); }
private CharsetGrammar() { var provider = new UnicodeCharSetProvider(); var mapper = new UnicodeUtf16Mapper(false, false); var rx = RegexLexer.CreateRx(mapper); var rxWhitespace = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymWhitespace), SymWhitespace, 0); var rxCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymCharset), SymCharset, 0); var rxRegexCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymRegexCharset), SymRegexCharset, 0); var rxUnion = new RxAccept <char>(RegexMatchSet.FromChars('|', '+').ToInvariant(mapper, provider, true), SymUnion, 0); var rxSubtract = new RxAccept <char>(RegexMatchSet.FromChars('-').ToInvariant(mapper, provider, true), SymSubtract, 0); var rxIntersect = new RxAccept <char>(RegexMatchSet.FromChars('&').ToInvariant(mapper, provider, true), SymIntersect, 0); var rxDifference = new RxAccept <char>(RegexMatchSet.FromChars('^').ToInvariant(mapper, provider, true), SymDifference, 0); var rxNegate = new RxAccept <char>(RegexMatchSet.FromChars('~').ToInvariant(mapper, provider, true), SymNegate, 0); var rxParensOpen = new RxAccept <char>(RegexMatchSet.FromChars('(').ToInvariant(mapper, provider, true), SymParensOpen, 0); var rxParensClose = new RxAccept <char>(RegexMatchSet.FromChars(')').ToInvariant(mapper, provider, true), SymParensClose, 0); var alpha = new AlphabetBuilder <char>( new RxAlternation <char>(rxWhitespace, new RxAlternation <char>(rxCharset, new RxAlternation <char>(rxRegexCharset, new RxAlternation <char>(rxUnion, new RxAlternation <char>(rxSubtract, new RxAlternation <char>(rxIntersect, new RxAlternation <char>(rxDifference, new RxAlternation <char>(rxNegate, new RxAlternation <char>(rxParensOpen, rxParensClose))))))))), Utf16Chars.EOF, Utf16Chars.ValidBmp); var nfa = NfaBuilder <LetterId> .Build(alpha.Expression); var dfa = DfaBuilder <LetterId> .Build(nfa, LetterId.Eof); if (dfa.StartState.Id != default(Id <DfaState <LetterId> >)) { throw new InvalidOperationException($"Internal error: Unexpected DFA start state {dfa.StartState.Id}"); } this.stateMachine = DfaStateMachineEmitter.CreateExpression(dfa, AlphabetMapperEmitter <char> .CreateExpression(alpha)).Compile(); this.table = new LalrTableGenerator(new GrammarBuilder(-2, -1, SymExpression) { { SymUnionExpression, SymExpression, SymUnion, SymNegateExpression }, { SymExpression, SymUnionExpression }, { SymSubtractExpression, SymExpression, SymSubtract, SymNegateExpression }, { SymExpression, SymSubtractExpression }, { SymIntersectExpression, SymExpression, SymIntersect, SymNegateExpression }, { SymExpression, SymIntersectExpression }, { SymDifferenceExpression, SymExpression, SymDifference, SymNegateExpression }, { SymExpression, SymDifferenceExpression }, { SymExpression, SymNegateExpression }, { SymNegateExpression, SymNegate, SymValueExpression }, { SymNegateExpression, SymValueExpression }, { SymValueExpression, SymParensOpen, SymExpression, SymParensClose }, { SymValueExpression, SymCharset }, { SymValueExpression, SymRegexCharset } }) .ComputeTable(); }
public void Test() { var r1 = Pattern.Regex("if"); var r2 = Pattern.Regex("[a-zA-Z][a-zA-Z0-9]*"); var bld = new DfaBuilder <string>(); bld.AddPattern(r1, "if"); bld.AddPattern(r2, "id"); var start = bld.Build(new HashSet <string>(new[] { "if", "id" }), accepts => accepts.First()); PrintDot(start); }
private void Build(DfaBuilder <JavaToken> builder) { foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var lang = new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()); var start = builder.Build(lang, null); CheckDfa(start, "JavaTest.out.txt", false); }
public void Dfab_Plus() { var dfab = new DfaBuilder("CC+"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Debug.WriteLine(dfab.ParseTree.ToString()); Assert.AreEqual(2, dfab.States.Length, "Should have 2 states."); Assert.AreEqual(1, dfab.Transitions[0, 0xCC]); Assert.AreEqual(1, dfab.Transitions[1, 0xCC]); }
public void Dfab_PlusCat() { var dfab = new DfaBuilder("CC+55"); dfab.ExtendWithEos(); dfab.BuildNodeSets(); dfab.BuildAutomaton(); Debug.WriteLine(dfab.ParseTree.ToString()); Assert.AreEqual(3, dfab.States.Length, "Should have 3 states."); Assert.AreEqual(1, dfab.Transitions[0, 0xCC]); Assert.AreEqual(1, dfab.Transitions[1, 0xCC]); Assert.AreEqual(2, dfab.Transitions[1, 0x55]); }
public void Test() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()), null); CheckDfa(start, "JavaTest.out.txt", false); }
public void TestSeparateAccentedCharacters() { var builder = new DfaBuilder <AccentedChar>(); builder.AddPattern(Pattern.Match("Les Mise\u0301rables"), AccentedChar.Miserable); var searcher = builder.BuildStringSearcher(null); var instr = "Les Mise\u0301rables"; var want = "[Miserable=Les Mise\u0301rables]"; var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
protected DfaState(DfaNode[] nodes, DfaTransition[] lazyTransitions, DfaTransition successTransition, DfaTransition prevSuccessTransition, int hashCode, DfaBuilder <TDfaState> builder) { Index = builder.Register((TDfaState)this); Array = builder.FastLookup ? new TDfaState[ArrayLimit] : System.Array.Empty <TDfaState>(); Dictionary = new DfaDictionary <TDfaState>(); Builder = builder; Nodes = nodes; LazyTransitions = lazyTransitions; HashCode = hashCode; SuccessTransition = successTransition; PrevSuccessTransition = prevSuccessTransition; SuccessSubGraph = SuccessTransition?.SubGraph; if (SuccessSubGraph != null) { SavePointer = true; } if (nodes.Length == 1 && SuccessTransition != null) { var dfaNode = Nodes[0]; var node = (Node)dfaNode.Node; if ((node is ReturnStateNode || node.ReturnPath.IsInvalid) && node.ExecutionPaths.Length == 0) { Break = true; } } Predicate = Nodes.FirstOrDefault(a => a.ExecutionPathObject != null)?.ExecutionPathObject; if (Predicate != null) { TruePredicateArray = builder.FastLookup ? new TDfaState[ArrayLimit] : System.Array.Empty <TDfaState>(); TruePredicateDictionary = new Dictionary <int, TDfaState>(); FalsePredicateArray = builder.FastLookup ? new TDfaState[ArrayLimit] : System.Array.Empty <TDfaState>(); FalsePredicateDictionary = new Dictionary <int, TDfaState>(); } // ReSharper disable once VirtualMemberCallInConstructor NullState = CreateNullState(); if (SavePointer == false && Break == false && Predicate == null) { Continue = true; } }
public void TestSingleToken() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var searcher = builder.BuildStringSearcher(null); var instr = "this"; var want = "[THIS=this]"; var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
public void Test() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var searcher = builder.BuildStringSearcher(null); var instr = ReadResource("SearcherTestInput.txt"); var want = ReadResource("SearcherTestOutput.txt"); var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
void MinAcDfaConstructionTest() { var dfa = DfaBuilder.ConstructMinAcyclicDFA(new[] { "appl", "bapp", "cppe", "cppee", "x" }); Assert.True(dfa.Recognize("appl")); Assert.True(dfa.Recognize("bapp")); Assert.True(dfa.Recognize("cppe")); Assert.True(dfa.Recognize("cppee")); Assert.True(dfa.Recognize("x")); Assert.False(dfa.Recognize("e")); Assert.False(dfa.Recognize("ex")); Assert.False(dfa.Recognize("cpp")); }
public static Automaton CreateFromPattern(string pattern) { try { var parser = new PatternParser(pattern); var tree = parser.Parse(); var builder = new DfaBuilder(tree); builder.ExtendWithEos(); builder.BuildNodeSets(); builder.BuildAutomaton(tree); return new Automaton(builder.States, builder.Transitions); } catch { return null; } }
public void Test() { var revBuilder = new DfaBuilder <bool>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { revBuilder.AddPattern(Pattern.AllStrings.Then(tok.Pattern().Reversed), true); } var wantStart = revBuilder.Build(null); var want = _toString(wantStart); var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var haveStart = builder.BuildReverseFinder(); var have = _toString(haveStart); Assert.Equal(want, have); //make sure we properly exclude the empty string from the reverse finder DFA builder.Clear(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { if (((int)tok & 1) == 0) { builder.AddPattern(tok.Pattern(), tok); } else { builder.AddPattern(Pattern.Maybe(tok.Pattern()), tok); } } haveStart = builder.BuildReverseFinder(); have = _toString(haveStart); Assert.Equal(want, have); }
public void Test() { var cache = new InMemoryBuilderCache(); var builder = new DfaBuilder <JavaToken>(cache); Build(builder); Assert.Equal(1, cache.Cache.Count); Assert.Equal(0, cache.Hits); builder.Clear(); Build(builder); Assert.Equal(1, cache.Cache.Count); Assert.Equal(1, cache.Hits); builder = new DfaBuilder <JavaToken>(cache); Build(builder); Assert.Equal(1, cache.Cache.Count); Assert.Equal(2, cache.Hits); }
public void Test() { var builder = new DfaBuilder <JavaToken?>(null); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken?>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken?>()), null); var auxInfo = new DfaAuxiliaryInformation <JavaToken?>(new[] { start }); //calculate destinies the slow way var states = auxInfo.GetStatesByNumber(); var slowDestinies = new List <ISet <JavaToken?> >(states.Count); var numStates = states.Count; for (var i = 0; i < numStates; i++) { slowDestinies.Add(new HashSet <JavaToken?>()); var state = states[i]; if (state.IsAccepting) { slowDestinies[i].Add(state.Match); } } //AtomicBoolean again = new AtomicBoolean(true); var again = true; while (again) { again = false; for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; var state = states[i]; state.EnumerateTransitions((f, l, target) => { var targetSet = slowDestinies[target.StateNumber]; var a = true; foreach (var token in targetSet) { if (!set.Add(token)) { a = false; } } if (a) { again = true; } }); } } /* * PrettyPrinter p = new PrettyPrinter(true); * PrintWriter pw = new PrintWriter(System.out); * p.print(pw, start); * pw.flush(); */ var destinies = auxInfo.GetDestinies(); for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; JavaToken?wantDestiny = null; if (set.Count == 1) { wantDestiny = set.FirstOrDefault(); } Assert.Equal(/*"State " + i + " destiny",*/ wantDestiny, destinies[i]); } }
public DfaBuilderKey(DfaBuilder <TDfaState> builder, int transitionsCount) { _builder = builder; _lazyTransitionsState = new bool[transitionsCount]; }
public void TestSimultaneousLanguages() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 3); } else if (i % 3 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 1); } else if (i % 7 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 2); } } var langs = new List <ISet <int> >(); { var s1 = new HashSet <int>(); s1.Add(1); s1.Add(3); var s2 = new HashSet <int>(); s2.Add(2); s2.Add(3); langs.Add(s1); langs.Add(s2); } var stopWatch = new Stopwatch(); stopWatch.Start(); var starts = builder.Build(langs, null); var start3 = starts[0]; var start7 = starts[1]; var numstates = CountStates(start3, start7); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Minimized 1000000 numbers -> divisible by 7 and 3 (down to {numstates} states) in {telapsed * .001} seconds"); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(3, result); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out result)); Assert.Equal(3, result); } else if (i % 3 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(1, result); Assert.False(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out _)); } else if (i % 7 == 0) { Assert.False(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out _)); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out var result)); Assert.Equal(2, result); } } Assert.Equal(137, numstates); }