internal static FA Parse(LexContext pc, int accept = -1) { FA result = null, next = null; int ich; pc.EnsureStarted(); while (true) { switch (pc.Current) { case -1: #if MINIMIZE result = result.ToDfa(); result.TrimDuplicates(); #endif return(result); case '.': var dot = FA.Set(new int[] { 0, 0x10ffff }, accept); if (null == result) { result = dot; } else { result = FA.Concat(new FA[] { result, dot }, accept); } pc.Advance(); result = _ParseModifier(result, pc, accept); break; case '\\': pc.Advance(); pc.Expecting(); var isNot = false; switch (pc.Current) { case 'P': isNot = true; goto case 'p'; case 'p': pc.Advance(); pc.Expecting('{'); var uc = new StringBuilder(); int uli = pc.Line; int uco = pc.Column; long upo = pc.Position; while (-1 != pc.Advance() && '}' != pc.Current) { uc.Append((char)pc.Current); } pc.Expecting('}'); pc.Advance(); int uci = 0; switch (uc.ToString()) { case "Pe": uci = 21; break; case "Pc": uci = 19; break; case "Cc": uci = 14; break; case "Sc": uci = 26; break; case "Pd": uci = 19; break; case "Nd": uci = 8; break; case "Me": uci = 7; break; case "Pf": uci = 23; break; case "Cf": uci = 15; break; case "Pi": uci = 22; break; case "Nl": uci = 9; break; case "Zl": uci = 12; break; case "Ll": uci = 1; break; case "Sm": uci = 25; break; case "Lm": uci = 3; break; case "Sk": uci = 27; break; case "Mn": uci = 5; break; case "Ps": uci = 20; break; case "Lo": uci = 4; break; case "Cn": uci = 29; break; case "No": uci = 10; break; case "Po": uci = 24; break; case "So": uci = 28; break; case "Zp": uci = 13; break; case "Co": uci = 17; break; case "Zs": uci = 11; break; case "Mc": uci = 6; break; case "Cs": uci = 16; break; case "Lt": uci = 2; break; case "Lu": uci = 0; break; } if (isNot) { next = FA.Set(CharacterClasses.UnicodeCategories[uci], accept); } else { next = FA.Set(CharacterClasses.NotUnicodeCategories[uci], accept); } break; case 'd': next = FA.Set(CharacterClasses.digit, accept); pc.Advance(); break; case 'D': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.digit), accept); pc.Advance(); break; case 's': next = FA.Set(CharacterClasses.space, accept); pc.Advance(); break; case 'S': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.space), accept); pc.Advance(); break; case 'w': next = FA.Set(CharacterClasses.word, accept); pc.Advance(); break; case 'W': next = FA.Set(RangeUtility.NotRanges(CharacterClasses.word), accept); pc.Advance(); break; default: if (-1 != (ich = _ParseEscapePart(pc))) { next = FA.Literal(new int[] { ich }, accept); } else { pc.Expecting(); // throw an error return(null); // doesn't execute } break; } next = _ParseModifier(next, pc, accept); if (null != result) { result = FA.Concat(new FA[] { result, next }, accept); } else { result = next; } break; case ')': #if MINIMIZE result = result.ToDfa(); result.TrimDuplicates(); #endif return(result); case '(': pc.Advance(); pc.Expecting(); next = Parse(pc, accept); pc.Expecting(')'); pc.Advance(); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; case '|': if (-1 != pc.Advance()) { next = Parse(pc, accept); result = FA.Or(new FA[] { result, next }, accept); } else { result = FA.Optional(result, accept); } break; case '[': var seti = _ParseSet(pc); var set = seti.Value; if (seti.Key) { set = RangeUtility.NotRanges(set); } next = FA.Set(set, accept); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; default: ich = pc.Current; if (char.IsHighSurrogate((char)ich)) { if (-1 == pc.Advance()) { throw new ExpectingException("Expecting low surrogate in Unicode stream", pc.Line, pc.Column, pc.Position, pc.FileOrUrl, "low-surrogate"); } ich = char.ConvertToUtf32((char)ich, (char)pc.Current); } next = FA.Literal(new int[] { ich }, accept); pc.Advance(); next = _ParseModifier(next, pc, accept); if (null == result) { result = next; } else { result = FA.Concat(new FA[] { result, next }, accept); } break; } } }
/// <summary> /// Writes a Graphviz dot specification of the specified closure to the specified <see cref="TextWriter"/> /// </summary> /// <param name="closure">The closure of all states</param> /// <param name="writer">The writer</param> /// <param name="options">A <see cref="DotGraphOptions"/> instance with any options, or null to use the defaults</param> static void _WriteDotTo(IList <FA> closure, TextWriter writer, DotGraphOptions options = null) { if (null == options) { options = new DotGraphOptions(); } string spfx = null == options.StatePrefix ? "q" : options.StatePrefix; writer.WriteLine("digraph FA {"); writer.WriteLine("rankdir=LR"); writer.WriteLine("node [shape=circle]"); var finals = new List <FA>(); var neutrals = new List <FA>(); var accepting = closure[0].FillAcceptingStates(); foreach (var ffa in closure) { if (ffa.IsFinal && !ffa.IsAccepting) { finals.Add(ffa); } } int i = 0; foreach (var ffa in closure) { if (!finals.Contains(ffa)) { if (ffa.IsAccepting) { accepting.Add(ffa); } else if (ffa.IsNeutral) { neutrals.Add(ffa); } } var rngGrps = ffa.FillInputTransitionRangesGroupedByState(); foreach (var rngGrp in rngGrps) { var di = closure.IndexOf(rngGrp.Key); writer.Write(spfx); writer.Write(i); writer.Write("->"); writer.Write(spfx); writer.Write(di.ToString()); writer.Write(" [label=\""); var sb = new StringBuilder(); var rngs = rngGrp.Value; var nrngs = RangeUtility.NotRanges(rngs); var isNot = false; if (nrngs.Length < rngs.Length || (nrngs.Length == rngs.Length && 0x10ffff == rngs[rngs.Length - 1])) { isNot = true; if (0 != nrngs.Length) { sb.Append("^"); } else { sb.Append("."); } rngs = nrngs; } for (var r = 0; r < rngs.Length; r += 2) { _AppendRangeTo(sb, rngs, r); } if (isNot || sb.Length != 1 || (char.IsWhiteSpace(sb.ToString(), 0))) { writer.Write('['); writer.Write(_EscapeLabel(sb.ToString())); writer.Write(']'); } else { writer.Write(_EscapeLabel(sb.ToString())); } writer.WriteLine("\"]"); } // do epsilons foreach (var fffa in ffa.EpsilonTransitions) { writer.Write(spfx); writer.Write(i); writer.Write("->"); writer.Write(spfx); writer.Write(closure.IndexOf(fffa)); writer.WriteLine(" [style=dashed,color=gray]"); } ++i; } string delim = ""; i = 0; foreach (var ffa in closure) { writer.Write(spfx); writer.Write(i); writer.Write(" ["); writer.Write("label=<"); writer.Write("<TABLE BORDER=\"0\"><TR><TD>"); writer.Write(spfx); writer.Write("<SUB>"); writer.Write(i); writer.Write("</SUB></TD></TR>"); if (ffa.IsAccepting) { writer.Write("<TR><TD>"); writer.Write(Convert.ToString(ffa.AcceptSymbol).Replace("\"", """)); writer.Write("</TD></TR>"); } writer.Write("</TABLE>"); writer.Write(">"); bool isfinal = false; if (accepting.Contains(ffa) || (isfinal = finals.Contains(ffa))) { writer.Write(",shape=doublecircle"); } if (isfinal || neutrals.Contains(ffa)) { writer.Write(",color=gray"); } writer.WriteLine("]"); ++i; } delim = ""; if (0 < accepting.Count) { foreach (var ntfa in accepting) { writer.Write(delim); writer.Write(spfx); writer.Write(closure.IndexOf(ntfa)); delim = ","; } writer.WriteLine(" [shape=doublecircle]"); } delim = ""; if (0 < neutrals.Count) { foreach (var ntfa in neutrals) { writer.Write(delim); writer.Write(spfx); writer.Write(closure.IndexOf(ntfa)); delim = ","; } writer.WriteLine(" [color=gray]"); delim = ""; } delim = ""; if (0 < finals.Count) { foreach (var ntfa in finals) { writer.Write(delim); writer.Write(spfx); writer.Write(closure.IndexOf(ntfa)); delim = ","; } writer.WriteLine(" [shape=doublecircle,color=gray]"); } writer.WriteLine("}"); }