protected override DFA.State GetNextState(DFA.State state, char input) { return(dfa.Transitions .Where(f => f.From == state && f.ValidInput.Ranges.Any(r => r.From <= input && r.To >= input)) .Select(f => f.To) .SingleOrDefault()); }
// http://web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart3.pdf public DFA ToDfa() { // each DFA states corresponds to several NFA states Dictionary <DFA.State, HashSet <NFA.State> > map = new Dictionary <DFA.State, HashSet <NFA.State> >(); // create a start state for DFA DFA.State start = new DFA.State(); DFA dfa = new DFA(start); map[start] = EmptyClosure(new[] { Start }); Stack <DFA.State> unmarked = new Stack <DFA.State>(); unmarked.Push(start); while (unmarked.Count > 0) { DFA.State state = unmarked.Pop(); // todo: split unique ranges IEnumerable <CharRange> alphabet = CharRange.Split(map[state].SelectMany(s => s.Alphabet).Distinct().ToArray()).Distinct(); foreach (CharRange c in alphabet) { IEnumerable <State> reachableStates = map[state].Where(s => s[c] != null).SelectMany(s => s[c]).ToArray(); HashSet <State> move = EmptyClosure(reachableStates); if (move.Count > 0) { DFA.State newState = map.Where(kp => kp.Value.SetEquals(move)) .Select(kp => kp.Key) .FirstOrDefault(); if (newState == null) { newState = new DFA.State(); unmarked.Push(newState); map[newState] = move; } state.AddTransition(c, newState); } } } // if any NFA state is a final state, that DFA state is also a final state foreach (DFA.State state in map.Keys) { if (map[state].Any(s => s.IsFinal)) { state.IsFinal = true; } state.Values = map[state].Where(s => s.Values != null).SelectMany(s => s.Values).Distinct().OrderBy(v => v).ToArray(); state.Minimize(); } return(dfa); }
public TransitionTable(DFA dfa, IList <NFA> nfas, IList <Tuple <string, Func <string, T> > > tokens) { // Get a list of all valid input ranges that are distinct. // This will fill up the entire spectrum from 0 to max char // Sort these ranges so that they start with the lowest to highest start List <CharRange> allValidRanges = nfas.Select( f => f.Transitions.Aggregate(Enumerable.Empty <CharRange>(), (acc, a) => acc.Union(a.ValidInput.Ranges))) .Aggregate((acc, a) => acc.Union(a)) .OrderBy(f => f.From) .ToList(); // This list might not be properly terminated at both ends. This happens if there // never is anything that accepts any character. char start = allValidRanges.First().From; if (start != '\0') { // Add a range that goes from \0 to the character before start allValidRanges.Insert(0, new CharRange { From = '\0', To = (char)(start - 1) }); } char end = allValidRanges.Last().To; if (end != char.MaxValue) { allValidRanges.Add(new CharRange { From = (char)(end + 1), To = char.MaxValue }); } // Create a 2D table // First dimension is the number of states found in the DFA // Second dimension is number of distinct character ranges short[,] uncompressed = new short[dfa.States.Count, allValidRanges.Count]; // Fill table with -1 for (int i = 0; i < dfa.States.Count; ++i) { for (int j = 0; j < allValidRanges.Count; ++j) { uncompressed[i, j] = -1; } } // Save the ends of the input ranges into an array inputRangeEnds = allValidRanges.Select(f => f.To).ToArray(); actions = new Tuple <int, Func <string, T> > [dfa.States.Count]; foreach (DFA.State state in dfa.States) { // Store to avoid problems with modified closure DFA.State state1 = state; foreach (Transition <DFA.State> transition in dfa.Transitions.Where(f => f.From == state1)) { // Set the table entry foreach (CharRange range in transition.ValidInput.Ranges) { int ix = allValidRanges.BinarySearch(range); uncompressed[state.StateNumber, ix] = (short)transition.To.StateNumber; } } // If this is an accepting state, set the action function to be // the FIRST defined action function if multiple ones match if (state.NfaStates.Any(f => f.AcceptState)) { // Find the lowest ranking NFA which has the accepting state in it for (int tokenNumber = 0; tokenNumber < nfas.Count; ++tokenNumber) { NFA nfa = nfas[tokenNumber]; if (nfa.States.Intersect(state.NfaStates.Where(f => f.AcceptState)).Any()) { // Match // This might be a token that we ignore. This is if the tokenNumber >= number of tokens // since the ignored tokens are AFTER the normal tokens. If this is so, set the action func to // int.MinValue, NULL to signal that the parsing should restart without reporting errors if (tokenNumber >= tokens.Count) { actions[state.StateNumber] = new Tuple <int, Func <string, T> >(int.MinValue, null); } else { actions[state.StateNumber] = new Tuple <int, Func <string, T> >( tokenNumber, tokens[tokenNumber].Item2); } break; } } } } table = new CompressedTable(uncompressed); asciiIndices = new int[256]; for (int i = 0; i < asciiIndices.Length; ++i) { asciiIndices[i] = FindTableIndexFromRanges((char)i); } }
protected override bool ReachedTermination(DFA.State nextState) { return(nextState == null); }
protected override Tuple <int, Func <string, T> > GetAction(DFA.State state) { return(actions.ContainsKey(state) ? actions[state] : null); }
protected override bool ReachedTermination(DFA.State nextState) => nextState == null;
internal virtual IEnumerable <Token> Tokenize(CharReader reader) { Compile(); if (dfa.Start == null) { yield break; } PositionCounter start = new PositionCounter(); PositionCounter end = null; int[] tokenTypeIDs = null; DFA.State current = dfa.Start; while (!reader.IsEnd) { char c = reader.Read(); current = current[c]; // reached the end, nowhere else to go // return the match until the prev final state // and go back to the next char right after the prev final state if (current == null) { yield return(CreateToken(reader, start, end, tokenTypeIDs)); reader.MoveBack(end.Position); reader.Release(); start = reader.PositionCounter; end = null; tokenTypeIDs = null; current = dfa.Start; continue; } // remember this position in case we need to come back if (current.IsFinal) { end = reader.PositionCounter; tokenTypeIDs = current.Values; } } if (end != null) { yield return(CreateToken(reader, start, end, tokenTypeIDs)); if (end.Position != reader.Position) { reader.MoveBack(end.Position - 1); foreach (Token token in Tokenize(reader)) { yield return(token); } } } else { yield return(CreateToken(reader, start, reader.PositionCounter, tokenTypeIDs)); } }