internal static FA Parse(LexContext pc, int accept = -1) { FA result = null, next = null; int ich; pc.EnsureStarted(); while (true) { switch (pc.Current) { case -1: #if MINIMIZE result = result.ToDfa(); result.TrimDuplicates(); #endif return(result); case '.': var dot = FA.Set(new int[] { 0, 0x10ffff }, accept); if (null == result) { result = dot; } else { result = FA.Concat(new FA[] { result, dot }, accept); } pc.Advance(); result = _ParseModifier(result, pc, accept); break; case '\\': pc.Advance(); pc.Expecting(); var isNot = false; switch (pc.Current) { case 'P': isNot = true; goto case 'p'; case 'p': pc.Advance(); pc.Expecting('{'); var uc = new StringBuilder(); int uli = pc.Line; int uco = pc.Column; long upo = pc.Position; while (-1 != pc.Advance() && '}' != pc.Current) { uc.Append((char)pc.Current); } pc.Expecting('}'); pc.Advance(); int uci = 0; switch (uc.ToString()) { case "Pe": uci = 21; break; case "Pc": uci = 19; break; case "Cc": uci = 14; break; case "Sc": uci = 26; break; case "Pd": uci = 19; break; case "Nd": uci = 8; break; case "Me": uci = 7; break; case "Pf": uci = 23; break; case "Cf": uci = 15; break; case "Pi": uci = 22; break; case "Nl": uci = 9; break; case "Zl": uci = 12; break; case "Ll": uci = 1; break; case "Sm": uci = 25; break; case "Lm": uci = 3; break; case "Sk": uci = 27; break; case "Mn": uci = 5; break; case "Ps": uci = 20; break; case "Lo": uci = 4; break; case "Cn": uci = 29; break; case "No": uci = 10; break; case "Po": uci = 24; break; case "So": uci = 28; break; case "Zp": uci = 13; break; case "Co": uci = 17; break; case "Zs": uci = 11; break; case "Mc": uci = 6; break; case "Cs": uci = 16; break; case "Lt": uci = 2; break; case "Lu": uci = 0; break; } if (isNot) { next = FA.Set(CharacterClasses.UnicodeCategories[uci], accept); } else { next = FA.Set(CharacterClasses.NotUnicodeCategories[uci], accept); } break; case 'd': next = FA.Set(CharacterClasses.digit, accept); pc.Advance(); break; case 'D': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.digit), accept); pc.Advance(); break; case 's': next = FA.Set(CharacterClasses.space, accept); pc.Advance(); break; case 'S': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.space), accept); pc.Advance(); break; case 'w': next = FA.Set(CharacterClasses.word, accept); pc.Advance(); break; case 'W': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.word), accept); pc.Advance(); break; default: if (-1 != (ich = _ParseEscapePart(pc))) { next = FA.Literal(new int[] { ich }, accept); } else { pc.Expecting(); // throw an error return(null); // doesn't execute } break; } next = _ParseModifier(next, pc, accept); if (null != result) { result = FA.Concat(new FA[] { result, next }, accept); } else { result = next; } break; case ')': #if MINIMIZE result = result.ToDfa(); result.TrimDuplicates(); #endif return(result); case '(': pc.Advance(); pc.Expecting(); next = Parse(pc, accept); pc.Expecting(')'); pc.Advance(); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; case '|': if (-1 != pc.Advance()) { next = Parse(pc, accept); result = FA.Or(new FA[] { result, next }, accept); } else { result = FA.Optional(result, accept); } break; case '[': var seti = _ParseSet(pc); var set = seti.Value; if (seti.Key) { set = RangeUtility.NotRanges(set); } next = FA.Set(set, accept); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; default: ich = pc.Current; if (char.IsHighSurrogate((char)ich)) { if (-1 == pc.Advance()) { throw new ExpectingException("Expecting low surrogate in Unicode stream", pc.Line, pc.Column, pc.Position, pc.FileOrUrl, "low-surrogate"); } ich = char.ConvertToUtf32((char)ich, (char)pc.Current); } next = FA.Literal(new int[] { ich }, accept); pc.Advance(); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; } } }
/// <summary> /// Returns a DFA table that can be used to lex or match /// </summary> /// <param name="symbolTable">The symbol table to use, or null to just implicitly tag symbols with integer ids</param> /// <returns>A DFA table that can be used to efficiently match or lex input</returns> public DfaEntry[] ToDfaStateTable(IList <int> symbolTable = null, IProgress <FAProgress> progress = null) { // only convert to a DFA if we haven't already // ToDfa() already checks but it always copies // the state information so this performs better FA dfa = null; if (!IsDfa) { dfa = ToDfa(progress); dfa.TrimDuplicates(progress); } else { dfa = this; } var closure = new List <FA>(); dfa.FillClosure(closure); var symbolLookup = new Dictionary <int, int>(); // if we don't have a symbol table, build // the symbol lookup from the states. if (null == symbolTable) { // go through each state, looking for accept symbols // and then add them to the new symbol table is we // haven't already var i = 0; for (int jc = closure.Count, j = 0; j < jc; ++j) { var fa = closure[j]; if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol)) { if (0 > fa.AcceptSymbol) { throw new InvalidOperationException("An accept symbol was never specified for state q" + jc.ToString()); } symbolLookup.Add(fa.AcceptSymbol, i); ++i; } } } else // build the symbol lookup from the symbol table { for (int ic = symbolTable.Count, i = 0; i < ic; ++i) { symbolLookup.Add(symbolTable[i], i); } } // build the root array var result = new DfaEntry[closure.Count]; for (var i = 0; i < result.Length; i++) { var fa = closure[i]; #if DEBUG if (fa.IsAccepting) { System.Diagnostics.Debug.Assert(-1 < fa.AcceptSymbol, "Illegal accept symbol " + fa.AcceptSymbol.ToString() + " was found on state state q" + i.ToString()); } #endif // get all the transition ranges for each destination state var trgs = fa.FillInputTransitionRangesGroupedByState(); // make a new transition entry array for our DFA state table var trns = new DfaTransitionEntry[trgs.Count]; var j = 0; // for each transition range foreach (var trg in trgs) { // add the transition entry using // the packed ranges from CharRange trns[j] = new DfaTransitionEntry( trg.Value, closure.IndexOf(trg.Key)); ++j; } // now add the state entry for the state above #if DEBUG if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol)) { try { dfa.RenderToFile(@"dfastatetable_crashdump_dfa.jpg"); } catch { } System.Diagnostics.Debug.Assert(false, "The symbol table did not contain an entry for state q" + i.ToString()); } #endif result[i] = new DfaEntry( fa.IsAccepting ? symbolLookup[fa.AcceptSymbol] : -1, trns); } return(result); }