Exemplo n.º 1
0
        public void UnionComplement1()
        {
            var charSetA = new RangeSet <Codepoint>('a');
            var charSet  = RangeOperations <Codepoint> .Union(charSetA, RangeOperations <Codepoint> .Negate(charSetA));

            Assert.Equal(new[] { Range <Codepoint> .Create(Codepoint.MinValue, Codepoint.MaxValue) }, charSet);
        }
Exemplo n.º 2
0
        public void UnionComplement2()
        {
            var charSetA    = new RangeSet <Codepoint>('a');
            var charSetNotA = RangeOperations <Codepoint> .Difference(RangeSet <Codepoint> .All, charSetA);

            var charSet = RangeOperations <Codepoint> .Union(charSetNotA, charSetA);

            Assert.Equal(new[] { Range <Codepoint> .Create(Codepoint.MinValue, Codepoint.MaxValue) }, charSet);
        }
Exemplo n.º 3
0
        /// <summary>Gets a Unicode range by name.</summary>
        /// <param name="name">The name.</param>
        /// <returns>A range set of codepoints.</returns>
        public static RangeSet <Codepoint> FromUnicodeName(string name)
        {
            lock (charSetByName) {
                if (!charSetByName.TryGetValue(name, out var result))
                {
                    if (!categoriesByName.TryGetValue(name, out var categories))
                    {
                        throw new ArgumentException(string.Format("Unknown unicode name '{0}'", name), "name");
                    }
                    result = RangeOperations <Codepoint> .Union(categories.Select(FromUnicodeCategory));

                    charSetByName.Add(name, result);
                }
                return(result);
            }
        }
        private static UsedRangeList <TLetter> MakeRanges(IDictionary <Id <RxMatch <TLetter> >, KeyValuePair <RangeSet <TLetter>, ICollection <LetterId> > > charsets, RangeSet <TLetter> validRanges)
        {
            var ranges = new UsedRangeList <TLetter>();

            foreach (var validRange in validRanges)
            {
                ranges.Add(new UsedLetterRange <TLetter>(validRange, null));
            }
            foreach (var pair in charsets)
            {
                foreach (var charRange in pair.Value.Key)
                {
                    // split left if necessary
                    var left = RangeOperations <TLetter> .BinarySearch(ranges, charRange.From);

                    var leftRange = ranges[left];
                    if (leftRange.From.CompareTo(charRange.From) < 0)
                    {
                        ranges.Insert(left++, new UsedLetterRange <TLetter>(leftRange.From, Incrementor <TLetter> .Decrement(charRange.From), leftRange.Users));
                        ranges[left] = new UsedLetterRange <TLetter>(charRange.From, leftRange.To, leftRange.Users);
                    }
                    // split right if necessary
                    var right = RangeOperations <TLetter> .BinarySearch(ranges, charRange.To);

                    var rightRange = ranges[right];
                    if (rightRange.To.CompareTo(charRange.To) > 0)
                    {
                        ranges[right] = new UsedLetterRange <TLetter>(rightRange.From, charRange.To, rightRange.Users);
                        ranges.Insert(right + 1, new UsedLetterRange <TLetter>(Incrementor <TLetter> .Increment(charRange.To), rightRange.To, rightRange.Users));
                    }
                    // add user information
                    for (var i = left; i <= right; i++)
                    {
                        ranges[i] = ranges[i].AddUser(pair.Key);
                    }
                }
            }
            return(ranges);
        }
Exemplo n.º 5
0
        public void RangeDiffEnum()
        {
            RangeSet <Codepoint> x;
            RangeSet <Codepoint> y;

            PrepareCodepointSets(out x, out y);
            Assert.Equal(new[] {
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('a', 'b'), ContainedIn.Left),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('d', 'd'), ContainedIn.Left),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('e', 'e'), ContainedIn.Both),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('f', 'f'), ContainedIn.Right),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('g', 'h'), ContainedIn.Both),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('i', 'z'), ContainedIn.Right)
            }, RangeOperations <Codepoint> .EnumerateRanges(x, y));
            Assert.Equal(new[] {
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('a', 'b'), ContainedIn.Right),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('d', 'd'), ContainedIn.Right),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('e', 'e'), ContainedIn.Both),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('f', 'f'), ContainedIn.Left),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('g', 'h'), ContainedIn.Both),
                new KeyValuePair <Range <Codepoint>, ContainedIn>(Range <Codepoint> .Create('i', 'z'), ContainedIn.Left)
            }, RangeOperations <Codepoint> .EnumerateRanges(y, x));
        }
Exemplo n.º 6
0
        public void RangeSelfEnum()
        {
            RangeSet <Codepoint> x;
            RangeSet <Codepoint> y;

            PrepareCodepointSets(out x, out y);
            Assert.Equal(x.Select(r => new KeyValuePair <Range <Codepoint>, ContainedIn>(r, ContainedIn.Both)), RangeOperations <Codepoint> .EnumerateRanges(x, x));
            Assert.Equal(y.Select(r => new KeyValuePair <Range <Codepoint>, ContainedIn>(r, ContainedIn.Both)), RangeOperations <Codepoint> .EnumerateRanges(y, y));
        }
        public static Dfa <TLetter> Build(INonFiniteAutomaton <TLetter> nfa, TLetter?eof = null, bool matchEmptyEof = false, Id <DfaState <TLetter> > firstId = default(Id <DfaState <TLetter> >))
        {
            var dfaStates = new Dictionary <string, DfaStateBuilder <TLetter> >(StringComparer.Ordinal);
            var result    = new List <DfaState <TLetter> >();
            // Step 1: compute the epsilon closure information for all NFA nodes
            var closures = nfa.States.ToDictionary(s => s.Id, state => state.EpsilonClosure().ToArray());
            // Step 2: simulate transitions
            var pending = new Queue <DfaStateBuilder <TLetter> >();
            DfaStateBuilder <TLetter> startDfaState;

            if (!GetStateBuilder(dfaStates, firstId, closures[nfa.StartState.Id], out startDfaState))
            {
                throw new InvalidOperationException("A new DFA state builder was expected");
            }
            DfaStateBuilder <TLetter> acceptDfaState;

            GetStateBuilder(dfaStates, firstId, new NfaState <TLetter> [0], out acceptDfaState);
            pending.Enqueue(startDfaState);
            do
            {
                var currentDfaState   = pending.Dequeue();
                var allNfaTransitions = new RangeDictionary <TLetter, IEnumerable <NfaState <TLetter> > >();
                foreach (var right in currentDfaState.NfaStates.Select(s => s.MatchTransitions))
                {
                    var left = allNfaTransitions;
                    allNfaTransitions = new RangeDictionary <TLetter, IEnumerable <NfaState <TLetter> > >(RangeOperations <TLetter> .EnumerateRanges(left.Keys, right.Keys, (rng, leftIndex, rightIndex) => {
                        var rangeStates = leftIndex.HasValue ? left.Values[leftIndex.Value] : Enumerable.Empty <NfaState <TLetter> >();
                        if (rightIndex.HasValue)
                        {
                            rangeStates = rangeStates.Append(right.Values[rightIndex.Value]);
                        }
                        return(new KeyValuePair <Range <TLetter>, IEnumerable <NfaState <TLetter> > >(rng, rangeStates));
                    }));
                }
                var groupedNfaTransitions = new RangeDictionary <TLetter, HashSet <NfaState <TLetter> > >(
                    allNfaTransitions.Select(p => new KeyValuePair <Range <TLetter>, HashSet <NfaState <TLetter> > >(p.Key, new HashSet <NfaState <TLetter> >(p.Value))),
                    SetEqualityComparer <NfaState <TLetter> > .Default);
                foreach (var matchTarget in groupedNfaTransitions)
                {
                    DfaStateBuilder <TLetter> targetDfaState;
                    if (GetStateBuilder(dfaStates, firstId, matchTarget.Value.SelectMany(t => closures[t.Id]), out targetDfaState))
                    {
                        pending.Enqueue(targetDfaState);
                    }
                    currentDfaState.SetTransition(matchTarget.Key, targetDfaState);
                }
            } while (pending.Count > 0);
            // Step 3: identify and remove identical (same transitions) states
            while (true)
            {
                var dupes = dfaStates
                            .Values
                            .GroupBy(s => s)
                            .Select(g => new KeyValuePair <DfaStateBuilder <TLetter>, ICollection <DfaStateBuilder <TLetter> > >(g.Key, g.Where(s => !ReferenceEquals(s, g.Key)).ToList()))
                            .Where(p => p.Value.Count > 1)
                            .ToDictionary();
                if (dupes.Count == 0)
                {
                    break;
                }
                foreach (var dupe in dupes)
                {
                    foreach (var builder in dfaStates.Values)
                    {
                        var dupeIds = new HashSet <int>(dupe.Value.Select(s => s.Id));
                        builder.ReplaceTransition(b => dupeIds.Contains(b.Id), dupe.Key);
                    }
                }
                foreach (var builder in dupes.SelectMany(d => d.Value))
                {
                    dfaStates.Remove(builder.Key);
                }
            }
            // Step 4: make the DFA states; the first is the start state
            var states       = new Dictionary <DfaStateBuilder <TLetter>, DfaState <TLetter> >();
            var symbolStates = new Dictionary <Id <DfaState <TLetter> >, SymbolId>();

            pending.Enqueue(startDfaState);
            do
            {
                var builder = pending.Dequeue();
                if (!states.ContainsKey(builder))
                {
                    var state = ((Func <Id <DfaState <TLetter> >, DfaState <TLetter> >)(id => new DfaState <TLetter>(id)))(new Id <DfaState <TLetter> >(result.Count));
                    result.Add(state);
                    states.Add(builder, state);
                    foreach (var transition in builder.GetTransitions())
                    {
                        pending.Enqueue(transition.Value);
                    }
                    var acceptSymbolIds = builder
                                          .NfaStates
                                          .Where(s => s.AcceptId.HasValue)
                                          // ReSharper disable once PossibleInvalidOperationException
                                          .GroupBy(a => a.Precedence, a => a.AcceptId.Value)
                                          .OrderByDescending(g => g.Key)
                                          .Take(1)
                                          .SelectMany(g => g)
                                          .Distinct()
                                          .ToList();
                    switch (acceptSymbolIds.Count)
                    {
                    case 0:
                        break;

                    case 1:
                        symbolStates.Add(state.Id, acceptSymbolIds[0]);
                        if (eof.HasValue)
                        {
                            state.SetTransition(Range <TLetter> .Create(eof.Value), Dfa <TLetter> .Accept);
                        }
                        break;

                    default:
                        throw new InvalidOperationException("The state " + state.Id + " has multiple same-precedence accept states " + string.Join(",", acceptSymbolIds));
                    }
                }
            } while (pending.Count > 0);
            if (matchEmptyEof && eof.HasValue && states[startDfaState].GetTransition(eof.Value) == Dfa <TLetter> .Reject)
            {
                states[startDfaState].SetTransition(Range <TLetter> .Create(eof.Value), Dfa <TLetter> .Accept);
            }
            // Step 5: apply transitions
            foreach (var pair in states)
            {
                foreach (var transition in pair.Key.GetTransitions())
                {
                    var range  = transition.Key;
                    var target = transition.Value;
                    pair.Value.SetTransition(range, states[target].Id);
                }
            }
            return(new Dfa <TLetter>(eof, result, symbolStates));
        }