public static Ast FromLiteral(IEnumerable <char> literal) { var exprs = new List <Ast> (); foreach (var ich in UnicodeUtility.ToUtf32(literal)) { var ast = new Ast(); ast.Kind = Lit; ast.Value = ich; exprs.Add(ast); } if (0 == exprs.Count) { throw new ArgumentException("The string cannot be empty", "literal"); } if (1 == exprs.Count) { return(exprs[0]); } var result = new Ast(); result.Kind = Cat; result.Exprs = exprs.ToArray(); return(result); }
static int[][] _BuildBlockEnds(IList <LexRule> rules) { int max = int.MinValue; for (int ic = rules.Count, i = 0; i < ic; ++i) { var rule = rules[i]; if (rule.Id > max) { max = rule.Id; } } var result = new int[max + 1][]; for (int ic = rules.Count, i = 0; i < ic; ++i) { var rule = rules[i]; var be = rule.GetAttribute("blockEnd") as string; if (!string.IsNullOrEmpty(be)) { result[rule.Id] = new List <int>(UnicodeUtility.ToUtf32(be)).ToArray(); } } return(result); }
static FFA _BuildLexer(IList <LexRule> rules, bool ignoreCase, string inputFile) { var exprs = new FFA[rules.Count]; var result = new FFA(); for (var i = 0; i < exprs.Length; ++i) { var rule = rules[i]; FFA fa; if (rule.Expression.StartsWith("\"")) { var pc = LexContext.Create(rule.Expression); fa = FFA.Literal(UnicodeUtility.ToUtf32(pc.ParseJsonString()), rule.Id); } else { fa = FFA.Parse(rule.Expression.Substring(1, rule.Expression.Length - 2), rule.Id, rule.ExpressionLine, rule.ExpressionColumn, rule.ExpressionPosition, inputFile); } if (0 > rule.Id) { System.Diagnostics.Debugger.Break(); } if (!ignoreCase) { var ic = (bool)rule.GetAttribute("ignoreCase", false); if (ic) { fa = FFA.CaseInsensitive(fa, rule.Id); } } else { var ic = (bool)rule.GetAttribute("ignoreCase", true); if (ic) { fa = FFA.CaseInsensitive(fa, rule.Id); } } result.AddEpsilon(fa); } return(result); }
static void Main(string[] args) { var kws = "abstract|as|ascending|async|await|base|bool|break|byte|case|catch|char|checked|class|const|continue|decimal|default|delegate|descending|do|double|dynamic|else|enum|equals|explicit|extern|event|false|finally|fixed| float |for|foreach| get | global |goto|if|implicit|int|interface|internal|is|lock|long|namespace|new|null|object|operator|out|override|params|partial|private|protected|public|readonly|ref|return|sbyte|sealed|set|short|sizeof|stackalloc|static|string|struct|switch|this|throw|true|try|typeof|uint|ulong|unchecked|unsafe|ushort|using|var|virtual|void|volatile|while|yield"; // shorten this so our state graphs aren't so big: kws = "as|base|case"; var lexa = new FA[] { FA.Parse(kws, 0), FA.Parse("[A-Z_a-z][0-9A-Z_a-z]*", 1), FA.Parse(@"""([^""]|\\[^n])*""", 2), FA.Parse("[\r\n\t\v\f ]+", 3) }; // build our lexer var nfa = FA.ToLexer(lexa); nfa.TrimNeutrals(); Console.WriteLine("NFA has " + nfa.FillClosure().Count + " states"); // minimize var dfa = nfa.ToDfa(); dfa.TrimDuplicates(); Console.WriteLine("DFA has " + dfa.FillClosure().Count + " states"); var baseFn = @"..\..\lex_"; var fn = baseFn + "nfa.jpg"; Console.WriteLine("Rendering..."); Console.WriteLine(fn); try { nfa.RenderToFile(fn); } catch { Console.WriteLine("Rendering aborted - GraphViz is not installed. Visit GraphViz.org to download."); } fn = baseFn + "dfa.jpg"; Console.WriteLine(fn); try { dfa.RenderToFile(fn); } catch { } var text = "\"\\\"foo\\tbar\\\"\""; text = "\"base foo \\\"bar\\\" foobar bar 123 baz -345 fubar 1foo *#( 0\""; Console.Write("Lex NFA " + text + ": "); var sb = new StringBuilder(); bool more; // lex NFA Console.WriteLine(nfa.Lex(UnicodeUtility.ToUtf32(text).GetEnumerator(), sb, out more)); // build a simple symbol table so our ids match our NFA var symids = new int[lexa.Length]; for (var i = 0; i < symids.Length; i++) { symids[i] = i; } var dfaTable = dfa.ToDfaStateTable(symids); Console.Write("Lex DFA " + text + ": "); Console.WriteLine(FA.Lex(dfaTable, UnicodeUtility.ToUtf32(text).GetEnumerator(), sb, out more)); var tokenizer = new Tokenizer(dfa, text); foreach (var token in tokenizer) { Console.WriteLine("{0}: {1}", token.SymbolId, token.Value); } return; }