Ejemplo n.º 1
0
 protected override DFA.State GetNextState(DFA.State state, char input)
 {
     return(dfa.Transitions
            .Where(f => f.From == state && f.ValidInput.Ranges.Any(r => r.From <= input && r.To >= input))
            .Select(f => f.To)
            .SingleOrDefault());
 }
Ejemplo n.º 2
0
        // http://web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart3.pdf
        public DFA ToDfa()
        {
            // each DFA states corresponds to several NFA states
            Dictionary <DFA.State, HashSet <NFA.State> > map = new Dictionary <DFA.State, HashSet <NFA.State> >();

            // create a start state for DFA
            DFA.State start = new DFA.State();
            DFA       dfa   = new DFA(start);

            map[start] = EmptyClosure(new[] { Start });

            Stack <DFA.State> unmarked = new Stack <DFA.State>();

            unmarked.Push(start);

            while (unmarked.Count > 0)
            {
                DFA.State state = unmarked.Pop();

                // todo: split unique ranges
                IEnumerable <CharRange> alphabet = CharRange.Split(map[state].SelectMany(s => s.Alphabet).Distinct().ToArray()).Distinct();

                foreach (CharRange c in alphabet)
                {
                    IEnumerable <State> reachableStates = map[state].Where(s => s[c] != null).SelectMany(s => s[c]).ToArray();
                    HashSet <State>     move            = EmptyClosure(reachableStates);

                    if (move.Count > 0)
                    {
                        DFA.State newState = map.Where(kp => kp.Value.SetEquals(move))
                                             .Select(kp => kp.Key)
                                             .FirstOrDefault();

                        if (newState == null)
                        {
                            newState = new DFA.State();
                            unmarked.Push(newState);
                            map[newState] = move;
                        }

                        state.AddTransition(c, newState);
                    }
                }
            }

            // if any NFA state is a final state, that DFA state is also a final state
            foreach (DFA.State state in map.Keys)
            {
                if (map[state].Any(s => s.IsFinal))
                {
                    state.IsFinal = true;
                }

                state.Values = map[state].Where(s => s.Values != null).SelectMany(s => s.Values).Distinct().OrderBy(v => v).ToArray();

                state.Minimize();
            }

            return(dfa);
        }
Ejemplo n.º 3
0
        public TransitionTable(DFA dfa, IList <NFA> nfas, IList <Tuple <string, Func <string, T> > > tokens)
        {
            // Get a list of all valid input ranges that are distinct.
            // This will fill up the entire spectrum from 0 to max char
            // Sort these ranges so that they start with the lowest to highest start
            List <CharRange> allValidRanges =
                nfas.Select(
                    f =>
                    f.Transitions.Aggregate(Enumerable.Empty <CharRange>(), (acc, a) => acc.Union(a.ValidInput.Ranges)))
                .Aggregate((acc, a) => acc.Union(a))
                .OrderBy(f => f.From)
                .ToList();

            // This list might not be properly terminated at both ends. This happens if there
            // never is anything that accepts any character.
            char start = allValidRanges.First().From;

            if (start != '\0')
            {
                // Add a range that goes from \0 to the character before start
                allValidRanges.Insert(0, new CharRange {
                    From = '\0', To = (char)(start - 1)
                });
            }

            char end = allValidRanges.Last().To;

            if (end != char.MaxValue)
            {
                allValidRanges.Add(new CharRange {
                    From = (char)(end + 1), To = char.MaxValue
                });
            }

            // Create a 2D table
            // First dimension is the number of states found in the DFA
            // Second dimension is number of distinct character ranges
            short[,] uncompressed = new short[dfa.States.Count, allValidRanges.Count];

            // Fill table with -1
            for (int i = 0; i < dfa.States.Count; ++i)
            {
                for (int j = 0; j < allValidRanges.Count; ++j)
                {
                    uncompressed[i, j] = -1;
                }
            }

            // Save the ends of the input ranges into an array
            inputRangeEnds = allValidRanges.Select(f => f.To).ToArray();
            actions        = new Tuple <int, Func <string, T> > [dfa.States.Count];

            foreach (DFA.State state in dfa.States)
            {
                // Store to avoid problems with modified closure
                DFA.State state1 = state;
                foreach (Transition <DFA.State> transition in dfa.Transitions.Where(f => f.From == state1))
                {
                    // Set the table entry
                    foreach (CharRange range in transition.ValidInput.Ranges)
                    {
                        int ix = allValidRanges.BinarySearch(range);
                        uncompressed[state.StateNumber, ix] = (short)transition.To.StateNumber;
                    }
                }

                // If this is an accepting state, set the action function to be
                // the FIRST defined action function if multiple ones match
                if (state.NfaStates.Any(f => f.AcceptState))
                {
                    // Find the lowest ranking NFA which has the accepting state in it
                    for (int tokenNumber = 0; tokenNumber < nfas.Count; ++tokenNumber)
                    {
                        NFA nfa = nfas[tokenNumber];

                        if (nfa.States.Intersect(state.NfaStates.Where(f => f.AcceptState)).Any())
                        {
                            // Match
                            // This might be a token that we ignore. This is if the tokenNumber >= number of tokens
                            // since the ignored tokens are AFTER the normal tokens. If this is so, set the action func to
                            // int.MinValue, NULL to signal that the parsing should restart without reporting errors
                            if (tokenNumber >= tokens.Count)
                            {
                                actions[state.StateNumber] = new Tuple <int, Func <string, T> >(int.MinValue, null);
                            }
                            else
                            {
                                actions[state.StateNumber] = new Tuple <int, Func <string, T> >(
                                    tokenNumber, tokens[tokenNumber].Item2);
                            }
                            break;
                        }
                    }
                }
            }

            table        = new CompressedTable(uncompressed);
            asciiIndices = new int[256];
            for (int i = 0; i < asciiIndices.Length; ++i)
            {
                asciiIndices[i] = FindTableIndexFromRanges((char)i);
            }
        }
Ejemplo n.º 4
0
 protected override bool ReachedTermination(DFA.State nextState)
 {
     return(nextState == null);
 }
Ejemplo n.º 5
0
 protected override Tuple <int, Func <string, T> > GetAction(DFA.State state)
 {
     return(actions.ContainsKey(state) ? actions[state] : null);
 }
Ejemplo n.º 6
0
 protected override bool ReachedTermination(DFA.State nextState) => nextState == null;
Ejemplo n.º 7
0
        internal virtual IEnumerable <Token> Tokenize(CharReader reader)
        {
            Compile();

            if (dfa.Start == null)
            {
                yield break;
            }

            PositionCounter start = new PositionCounter();
            PositionCounter end   = null;

            int[] tokenTypeIDs = null;

            DFA.State current = dfa.Start;

            while (!reader.IsEnd)
            {
                char c = reader.Read();
                current = current[c];

                // reached the end, nowhere else to go
                // return the match until the prev final state
                // and go back to the next char right after the prev final state
                if (current == null)
                {
                    yield return(CreateToken(reader, start, end, tokenTypeIDs));

                    reader.MoveBack(end.Position);
                    reader.Release();

                    start        = reader.PositionCounter;
                    end          = null;
                    tokenTypeIDs = null;

                    current = dfa.Start;
                    continue;
                }

                // remember this position in case we need to come back
                if (current.IsFinal)
                {
                    end          = reader.PositionCounter;
                    tokenTypeIDs = current.Values;
                }
            }

            if (end != null)
            {
                yield return(CreateToken(reader, start, end, tokenTypeIDs));

                if (end.Position != reader.Position)
                {
                    reader.MoveBack(end.Position - 1);
                    foreach (Token token in Tokenize(reader))
                    {
                        yield return(token);
                    }
                }
            }
            else
            {
                yield return(CreateToken(reader, start, reader.PositionCounter, tokenTypeIDs));
            }
        }