public void UnionComplement1() { var charSetA = new RangeSet <Codepoint>('a'); var charSet = RangeOperations <Codepoint> .Union(charSetA, RangeOperations <Codepoint> .Negate(charSetA)); Assert.Equal(new[] { Range <Codepoint> .Create(Codepoint.MinValue, Codepoint.MaxValue) }, charSet); }
public void UnionComplement2() { var charSetA = new RangeSet <Codepoint>('a'); var charSetNotA = RangeOperations <Codepoint> .Difference(RangeSet <Codepoint> .All, charSetA); var charSet = RangeOperations <Codepoint> .Union(charSetNotA, charSetA); Assert.Equal(new[] { Range <Codepoint> .Create(Codepoint.MinValue, Codepoint.MaxValue) }, charSet); }
/// <summary>Gets a Unicode range by name.</summary> /// <param name="name">The name.</param> /// <returns>A range set of codepoints.</returns> public static RangeSet <Codepoint> FromUnicodeName(string name) { lock (charSetByName) { if (!charSetByName.TryGetValue(name, out var result)) { if (!categoriesByName.TryGetValue(name, out var categories)) { throw new ArgumentException(string.Format("Unknown unicode name '{0}'", name), "name"); } result = RangeOperations <Codepoint> .Union(categories.Select(FromUnicodeCategory)); charSetByName.Add(name, result); } return(result); } }
private static UsedRangeList <TLetter> MakeRanges(IDictionary <Id <RxMatch <TLetter> >, KeyValuePair <RangeSet <TLetter>, ICollection <LetterId> > > charsets, RangeSet <TLetter> validRanges) { var ranges = new UsedRangeList <TLetter>(); foreach (var validRange in validRanges) { ranges.Add(new UsedLetterRange <TLetter>(validRange, null)); } foreach (var pair in charsets) { foreach (var charRange in pair.Value.Key) { // split left if necessary var left = RangeOperations <TLetter> .BinarySearch(ranges, charRange.From); var leftRange = ranges[left]; if (leftRange.From.CompareTo(charRange.From) < 0) { ranges.Insert(left++, new UsedLetterRange <TLetter>(leftRange.From, Incrementor <TLetter> .Decrement(charRange.From), leftRange.Users)); ranges[left] = new UsedLetterRange <TLetter>(charRange.From, leftRange.To, leftRange.Users); } // split right if necessary var right = RangeOperations <TLetter> .BinarySearch(ranges, charRange.To); var rightRange = ranges[right]; if (rightRange.To.CompareTo(charRange.To) > 0) { ranges[right] = new UsedLetterRange <TLetter>(rightRange.From, charRange.To, rightRange.Users); ranges.Insert(right + 1, new UsedLetterRange <TLetter>(Incrementor <TLetter> .Increment(charRange.To), rightRange.To, rightRange.Users)); } // add user information for (var i = left; i <= right; i++) { ranges[i] = ranges[i].AddUser(pair.Key); } } } return(ranges); }
public void RangeDiffEnum() { RangeSet <Codepoint> x; RangeSet <Codepoint> y; PrepareCodepointSets(out x, out y); Assert.Equal(new[] { new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('a', 'b'), ContainedIn.Left), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('d', 'd'), ContainedIn.Left), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('e', 'e'), ContainedIn.Both), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('f', 'f'), ContainedIn.Right), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('g', 'h'), ContainedIn.Both), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('i', 'z'), ContainedIn.Right) }, RangeOperations <Codepoint> .EnumerateRanges(x, y)); Assert.Equal(new[] { new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('a', 'b'), ContainedIn.Right), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('d', 'd'), ContainedIn.Right), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('e', 'e'), ContainedIn.Both), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('f', 'f'), ContainedIn.Left), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('g', 'h'), ContainedIn.Both), new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('i', 'z'), ContainedIn.Left) }, RangeOperations <Codepoint> .EnumerateRanges(y, x)); }
public void RangeSelfEnum() { RangeSet <Codepoint> x; RangeSet <Codepoint> y; PrepareCodepointSets(out x, out y); Assert.Equal(x.Select(r => new KeyValuePair <Range <Codepoint>, ContainedIn>(r, ContainedIn.Both)), RangeOperations <Codepoint> .EnumerateRanges(x, x)); Assert.Equal(y.Select(r => new KeyValuePair <Range <Codepoint>, ContainedIn>(r, ContainedIn.Both)), RangeOperations <Codepoint> .EnumerateRanges(y, y)); }
public static Dfa <TLetter> Build(INonFiniteAutomaton <TLetter> nfa, TLetter?eof = null, bool matchEmptyEof = false, Id <DfaState <TLetter> > firstId = default(Id <DfaState <TLetter> >)) { var dfaStates = new Dictionary <string, DfaStateBuilder <TLetter> >(StringComparer.Ordinal); var result = new List <DfaState <TLetter> >(); // Step 1: compute the epsilon closure information for all NFA nodes var closures = nfa.States.ToDictionary(s => s.Id, state => state.EpsilonClosure().ToArray()); // Step 2: simulate transitions var pending = new Queue <DfaStateBuilder <TLetter> >(); DfaStateBuilder <TLetter> startDfaState; if (!GetStateBuilder(dfaStates, firstId, closures[nfa.StartState.Id], out startDfaState)) { throw new InvalidOperationException("A new DFA state builder was expected"); } DfaStateBuilder <TLetter> acceptDfaState; GetStateBuilder(dfaStates, firstId, new NfaState <TLetter> [0], out acceptDfaState); pending.Enqueue(startDfaState); do { var currentDfaState = pending.Dequeue(); var allNfaTransitions = new RangeDictionary <TLetter, IEnumerable <NfaState <TLetter> > >(); foreach (var right in currentDfaState.NfaStates.Select(s => s.MatchTransitions)) { var left = allNfaTransitions; allNfaTransitions = new RangeDictionary <TLetter, IEnumerable <NfaState <TLetter> > >(RangeOperations <TLetter> .EnumerateRanges(left.Keys, right.Keys, (rng, leftIndex, rightIndex) => { var rangeStates = leftIndex.HasValue ? left.Values[leftIndex.Value] : Enumerable.Empty <NfaState <TLetter> >(); if (rightIndex.HasValue) { rangeStates = rangeStates.Append(right.Values[rightIndex.Value]); } return(new KeyValuePair <Range <TLetter>, IEnumerable <NfaState <TLetter> > >(rng, rangeStates)); })); } var groupedNfaTransitions = new RangeDictionary <TLetter, HashSet <NfaState <TLetter> > >( allNfaTransitions.Select(p => new KeyValuePair <Range <TLetter>, HashSet <NfaState <TLetter> > >(p.Key, new HashSet <NfaState <TLetter> >(p.Value))), SetEqualityComparer <NfaState <TLetter> > .Default); foreach (var matchTarget in groupedNfaTransitions) { DfaStateBuilder <TLetter> targetDfaState; if (GetStateBuilder(dfaStates, firstId, matchTarget.Value.SelectMany(t => closures[t.Id]), out targetDfaState)) { pending.Enqueue(targetDfaState); } currentDfaState.SetTransition(matchTarget.Key, targetDfaState); } } while (pending.Count > 0); // Step 3: identify and remove identical (same transitions) states while (true) { var dupes = dfaStates .Values .GroupBy(s => s) .Select(g => new KeyValuePair <DfaStateBuilder <TLetter>, ICollection <DfaStateBuilder <TLetter> > >(g.Key, g.Where(s => !ReferenceEquals(s, g.Key)).ToList())) .Where(p => p.Value.Count > 1) .ToDictionary(); if (dupes.Count == 0) { break; } foreach (var dupe in dupes) { foreach (var builder in dfaStates.Values) { var dupeIds = new HashSet <int>(dupe.Value.Select(s => s.Id)); builder.ReplaceTransition(b => dupeIds.Contains(b.Id), dupe.Key); } } foreach (var builder in dupes.SelectMany(d => d.Value)) { dfaStates.Remove(builder.Key); } } // Step 4: make the DFA states; the first is the start state var states = new Dictionary <DfaStateBuilder <TLetter>, DfaState <TLetter> >(); var symbolStates = new Dictionary <Id <DfaState <TLetter> >, SymbolId>(); pending.Enqueue(startDfaState); do { var builder = pending.Dequeue(); if (!states.ContainsKey(builder)) { var state = ((Func <Id <DfaState <TLetter> >, DfaState <TLetter> >)(id => new DfaState <TLetter>(id)))(new Id <DfaState <TLetter> >(result.Count)); result.Add(state); states.Add(builder, state); foreach (var transition in builder.GetTransitions()) { pending.Enqueue(transition.Value); } var acceptSymbolIds = builder .NfaStates .Where(s => s.AcceptId.HasValue) // ReSharper disable once PossibleInvalidOperationException .GroupBy(a => a.Precedence, a => a.AcceptId.Value) .OrderByDescending(g => g.Key) .Take(1) .SelectMany(g => g) .Distinct() .ToList(); switch (acceptSymbolIds.Count) { case 0: break; case 1: symbolStates.Add(state.Id, acceptSymbolIds[0]); if (eof.HasValue) { state.SetTransition(Range <TLetter> .Create(eof.Value), Dfa <TLetter> .Accept); } break; default: throw new InvalidOperationException("The state " + state.Id + " has multiple same-precedence accept states " + string.Join(",", acceptSymbolIds)); } } } while (pending.Count > 0); if (matchEmptyEof && eof.HasValue && states[startDfaState].GetTransition(eof.Value) == Dfa <TLetter> .Reject) { states[startDfaState].SetTransition(Range <TLetter> .Create(eof.Value), Dfa <TLetter> .Accept); } // Step 5: apply transitions foreach (var pair in states) { foreach (var transition in pair.Key.GetTransitions()) { var range = transition.Key; var target = transition.Value; pair.Value.SetTransition(range, states[target].Id); } } return(new Dfa <TLetter>(eof, result, symbolStates)); }