public static void WriteParserAndGeneratorClassesTo(EbnfDocument ebnf, Cfg cfg, string @namespace, string preamble, string name, string language, TextWriter writer) { if (string.IsNullOrEmpty(language)) { language = "cs"; } var cdp = CodeDomProvider.CreateProvider(language); var tokenizer = _CreateTokenizerClass(ebnf, cfg, string.Concat(name, "Tokenizer")); var parser = _CreateParserClass(cfg, string.Concat(name, "Parser")); var ccu = new CodeCompileUnit(); var cns = new CodeNamespace(@namespace); ccu.Namespaces.Add(cns); if (!string.IsNullOrEmpty(preamble)) { _FillPreamble(preamble, cns); } cns.Types.Add(tokenizer); cns.Types.Add(parser); var opts = new CodeGeneratorOptions(); opts.BlankLinesBetweenMembers = false; cdp.GenerateCodeFromCompileUnit(ccu, writer, opts); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { string sym = ""; if (null != parent) { sym = parent.GetContainingIdForExpression(this); } if (null == Right) { if (null == Left) { return(null); } var fa = Left.ToFA(parent, cfg); fa.FirstAcceptingState.AcceptingSymbol = sym; return(fa); } else if (null == Left) { var fa = Right.ToFA(parent, cfg); fa.FirstAcceptingState.AcceptingSymbol = sym; return(fa); } return(CharFA.Concat(new CharFA[] { Left.ToFA(parent, cfg), Right.ToFA(parent, cfg) }, sym)); }
static void _DoParse() { EbnfDocument ebnf; EbnfDocument._TryParse(new EbnfParser(new EbnfTokenizer(new FileReaderEnumerable(@"..\..\..\ebnf.ebnf"))), out ebnf); Console.WriteLine(ebnf); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { if (null == Expression) { return(null); } return(CharFA.Optional(Expression.ToFA(parent, cfg), (null == parent) ? "" : parent.GetContainingIdForExpression(this))); }
public override IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg) { if (string.IsNullOrEmpty(Symbol)) { throw new InvalidOperationException("The ref expression was nil."); } var l = new List <IList <string> >(); var ll = new List <string>(); l.Add(ll); ll.Add(Symbol); return(l); }
public static void WriteTokenizerClassTo(EbnfDocument ebnf, Cfg cfg, string name, string language, TextWriter writer) { if (string.IsNullOrEmpty(language)) { language = "cs"; } var cdp = CodeDomProvider.CreateProvider(language); var tokenizer = _CreateTokenizerClass(ebnf, cfg, name); var opts = new CodeGeneratorOptions(); opts.BlankLinesBetweenMembers = false; cdp.GenerateCodeFromType(tokenizer, writer, opts); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { if (null == Expression) { return(null); } var result = CharFA.Repeat(Expression.ToFA(parent, cfg), (null == parent) ? "" : parent.GetContainingIdForExpression(this)); if (IsOptional) { result = CharFA.Optional(result); } return(result); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { if (null == parent) { throw new InvalidOperationException("The FA cannot be retrieved from a symbol reference without the parent document."); } var expr = parent.Productions[Symbol].Expression; if (null != expr) { return(expr.ToFA(parent, cfg)); } return(null); }
public override IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg) { var l = new List <IList <string> >(); if (null != Expression) { l.AddRange(Expression.ToDisjunctions(parent, cfg)); var ll = new List <string>(); if (!l.Contains(ll, OrderedCollectionEqualityComparer <string> .Default)) { l.Add(ll); } } return(l); }
public override IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg) { foreach (var prod in parent.Productions) { if (Equals(prod.Value.Expression, this)) { var l = new List <IList <string> >(); var ll = new List <string>(); l.Add(ll); ll.Add(prod.Key); return(l); } } throw new InvalidOperationException("The terminal was not declared."); }
public static IList <EbnfMessage> _TryParse(EbnfParser parser, out EbnfDocument result) { result = null; var msgs = new List <EbnfMessage>(); ParseNode gn = null; var hasErrors = false; do { var pn = parser.ParseSubtree(); if (null != pn) { var das = pn.FillDescendantsAndSelf(); foreach (var p in das) { if (EbnfParser._ERROR == p.SymbolId) { hasErrors = true; msgs.Add(new EbnfMessage(EbnfErrorLevel.Error, -1, string.Concat("Syntax error in EBNF document. Unrecognized: ", p.Value), p.Line, p.Column, p.Position)); } } if (EbnfParser.grammar == pn.SymbolId) { gn = pn; } } } while (ParserNodeType.EndDocument != parser.NodeType); if (null == gn) { hasErrors = true; msgs.Add(new EbnfMessage(EbnfErrorLevel.Error, -1, "No productions were found.", 1, 1, 0)); } if (!hasErrors) { result = new EbnfDocument(); foreach (var pn in gn.Children) { if (EbnfParser.production == pn.SymbolId) { KeyValuePair <string, EbnfProduction> prod; msgs.AddRange(_TryParseProduction(pn, out prod)); result.Productions.Add(prod); } } } return(msgs); }
static int Main(string[] args) { if (1 < args.Length) { _PrintUsage(); return(1); } EbnfDocument ebnf; if (1 == args.Length) { ebnf = EbnfDocument.ReadFrom(args[0]); } else { ebnf = EbnfDocument.ReadFrom(Console.In); } ebnf.Validate(true); ebnf.Prepare(true); var cfg = ebnf.ToCfg(); cfg.PrepareLL1(false); cfg.PrepareLL1(false); cfg.PrepareLL1(false); cfg.PrepareLL1(false); foreach (var conflict in cfg.FillConflicts()) { switch (conflict.Kind) { case CfgConflictKind.FirstFirst: Console.WriteLine("First first conflict on {0} between rules:", conflict.Symbol); Console.WriteLine("\t{0}", conflict.Rule1); Console.Write("\t{0} k = ", conflict.Rule2); Console.WriteLine(cfg.GetK(conflict.Rule1, conflict.Rule2, 5)); break; case CfgConflictKind.FirstFollows: Console.WriteLine("First follows conflict on {0} between rules:", conflict.Symbol); Console.WriteLine("\t{0}", conflict.Rule1); Console.Write("\t{0} k = ", conflict.Rule2); Console.WriteLine(cfg.GetK(conflict.Rule1, conflict.Rule2, 5)); break; } } return(0); }
static int Main(string[] args) { if (2 > args.Length) { _PrintUsage(); return(1); } var ebnf = EbnfDocument.ReadFrom(args[0]); var cfg = ebnf.ToCfg(); cfg.PrepareLL1(); var lexer = ebnf.ToLexer(cfg); var parser1 = new DebugLL1Parser(cfg, null); parser1.ShowHidden = true; // won't work without these var parser2 = cfg.ToLL1Parser(null); parser2.ShowHidden = true; var failed = false; for (var i = 1; i < args.Length; ++i) { Console.WriteLine("For \"{0}\"...", args[i]); string input; using (var sr = File.OpenText(args[i])) input = sr.ReadToEnd(); var tokenizer = new DebugTokenizer(cfg, lexer, input); parser1.Restart(tokenizer); var pass = _TestParser(parser1, input); Console.WriteLine("Debug Test {0}", pass? "passed" : "failed"); parser2.Restart(tokenizer); if (!pass) { failed = true; } pass = _TestParser(parser2, input); Console.WriteLine("Table Test {0}", pass? "passed" : "failed"); if (!pass) { failed = true; } } return(failed ? 1 : 0); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { string sym = ""; if (null != parent) { sym = parent.GetContainingIdForExpression(this); } if (null == Right) { if (null == Left) { return(null); } return(CharFA.Optional(Left.ToFA(parent, cfg), sym)); } else if (null == Left) { return(CharFA.Optional(Right.ToFA(parent, cfg), sym)); } return(CharFA.Or(new CharFA[] { Left.ToFA(parent, cfg), Right.ToFA(parent, cfg) }, sym)); }
public override IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg) { var l = new List <IList <string> >(); if (null == Right) { if (null == Left) { return(l); } foreach (var ll in Left.ToDisjunctions(parent, cfg)) { l.Add(new List <string>(ll)); } return(l); } else if (null == Left) { foreach (var ll in Right.ToDisjunctions(parent, cfg)) { l.Add(new List <string>(ll)); } return(l); } foreach (var ll in Left.ToDisjunctions(parent, cfg)) { foreach (var ll2 in Right.ToDisjunctions(parent, cfg)) { var ll3 = new List <string>(); ll3.AddRange(ll); ll3.AddRange(ll2); if (!l.Contains(ll3, OrderedCollectionEqualityComparer <string> .Default)) { l.Add(ll3); } } } return(l); }
/// <summary> /// Usage: lltree $grammarfile $inputfile /// </summary> /// <param name="args">The grammar file and the input file to parse</param> /// <returns></returns> static int Main(string[] args) { if (2 != args.Length) { _PrintUsage(); return(1); } // read the ebnf document from the file. var ebnf = EbnfDocument.ReadFrom(args[0]); var hasErrors = false; // here we validate the document and print any // validation errors to the console. foreach (var msg in ebnf.Validate(false)) { if (EbnfErrorLevel.Error == msg.ErrorLevel) { hasErrors = true; Console.Error.WriteLine(msg); } } foreach (var msg in ebnf.Prepare(false)) { if (EbnfErrorLevel.Error == msg.ErrorLevel) { hasErrors = true; Console.Error.WriteLine(msg); } } // even if we have errors, we keep going. // create a CFG from the EBNF document var cfg = ebnf.ToCfg(); // we have to prepare a CFG to be parsable by an LL(1) // parser. This means removing left recursion, and // factoring out first-first and first-follows conflicts // where possible. // here we do that, and print any errors we encounter. foreach (var msg in cfg.PrepareLL1(false)) { if (CfgErrorLevel.Error == msg.ErrorLevel) { hasErrors = true; Console.Error.WriteLine(msg); } } // if we don't have errors let's set up our parse. if (!hasErrors) { // the tokenizer is created from the EBNF document becase // it has the terminal definitions, unlike the CFG, // see https://www.codeproject.com/Articles/5162249/How-to-Make-an-LL-1-Parser-Lesson-1 // The FileReaderEnumerable takes a filename and exposes IEnumerable<char> from // them. Tokenizers expect IEnumerable<char> (typically a string or char array) var tokenizer = ebnf.ToTokenizer(cfg, new FileReaderEnumerable(args[1])); // now create our parser. and since the parser *might* return multiple parse trees // in some cases, we keep reading until the end of document, calling ParseSubtree() // each time to get the result back as a ParseNode tree. We then take those nodes and // write them to the console via an implicit call to their ToString method using (var parser = cfg.ToLL1Parser(tokenizer)) while (ParserNodeType.EndDocument != parser.NodeType) { Console.WriteLine(parser.ParseSubtree()); } return(0); } return(1); }
public override CharFA ToFA(EbnfDocument parent, Cfg cfg) { return(CharFA.Literal(Value, (null == parent) ? "" : parent.GetIdForExpression(this))); }
public abstract IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg);
static CodeTypeDeclaration _CreateTokenizerClass(EbnfDocument ebnf, Cfg cfg, string name) { var lexer = ebnf.ToLexer(cfg); var sm = new Dictionary <string, int>(); var ii = 0; var syms = new List <string>(); cfg.FillSymbols(syms); var tt = new List <string>(syms); for (int jc = tt.Count, j = 0; j < jc; ++j) { if (cfg.IsNonTerminal(tt[j])) { tt[j] = null; } } foreach (var sym in syms) { sm.Add(sym, ii); ++ii; } var bes = new string[syms.Count]; for (ii = 0; ii < bes.Length; ii++) { bes[ii] = cfg.AttributeSets.GetAttribute(syms[ii], "blockEnd", null) as string; } var dfaTable = lexer.ToDfaTable(sm); var result = new CodeTypeDeclaration(); result.Name = name; result.BaseTypes.Add(typeof(TableTokenizer)); result.Attributes = MemberAttributes.FamilyOrAssembly; CodeMemberField f; foreach (var t in tt) { if (null != t) { f = new CodeMemberField(); f.Attributes = MemberAttributes.Const | MemberAttributes.Public; f.Name = t.Replace("#", "_").Replace("'", "_").Replace("<", "_").Replace(">", "_"); f.Type = new CodeTypeReference(typeof(int)); f.InitExpression = CodeDomUtility.Serialize(cfg.GetIdOfSymbol(t)); result.Members.Add(f); } } f = new CodeMemberField(); f.Name = "_Symbols"; f.Type = new CodeTypeReference(typeof(string[])); f.Attributes = MemberAttributes.Static; f.InitExpression = CodeDomUtility.Serialize(tt.ToArray()); result.Members.Add(f); f = new CodeMemberField(); f.Name = "_BlockEnds"; f.Type = new CodeTypeReference(typeof(string[])); f.Attributes = MemberAttributes.Static; f.InitExpression = CodeDomUtility.Serialize(bes); result.Members.Add(f); f = new CodeMemberField(); f.Name = "_DfaTable"; f.Type = new CodeTypeReference(typeof(CharDfaEntry[])); f.Attributes = MemberAttributes.Static; f.InitExpression = CodeDomUtility.Serialize(dfaTable); result.Members.Add(f); var ctor = new CodeConstructor(); ctor.Parameters.Add(new CodeParameterDeclarationExpression(typeof(IEnumerable <char>), "input")); ctor.BaseConstructorArgs.AddRange(new CodeExpression[] { new CodeFieldReferenceExpression(new CodeTypeReferenceExpression(result.Name), "_DfaTable"), new CodeFieldReferenceExpression(new CodeTypeReferenceExpression(result.Name), "_Symbols"), new CodeFieldReferenceExpression(new CodeTypeReferenceExpression(result.Name), "_BlockEnds"), new CodeArgumentReferenceExpression("input") }); ctor.Attributes = MemberAttributes.Public; result.Members.Add(ctor); return(result); }
static int Main(string[] args) { string grammarFile = null; string outFile = null; string @namespace = null; string language = null; // "c#"; var optIndex = -1; for (var i = 0; i < args.Length; ++i) { if ("--help" == args[i] || "/?" == args[i] || "/help" == args[i]) { _PrintUsage(); return(0); } if (args[i].StartsWith("/")) { optIndex = i; if (i == args.Length - 1) { _PrintUsage(); return(1); } switch (args[i]) { case "/language": ++i; language = args[i]; break; case "/namespace": ++i; @namespace = args[i]; break; default: _PrintUsage(); return(1); } } else { if (-1 != optIndex) { _PrintUsage(); return(1); } if (0 == i) { grammarFile = args[i]; } else if (1 == i) { outFile = args[i]; } else { _PrintUsage(); return(1); } } } string inp; if (string.IsNullOrEmpty(grammarFile)) { inp = Console.In.ReadToEnd(); } else { using (var sr = File.OpenText(grammarFile)) inp = sr.ReadToEnd(); } var ebnf = EbnfDocument.Parse(inp); var hasErrors = false; foreach (var msg in ebnf.Validate(false)) { Console.Error.WriteLine(string.Concat("EBNF ", msg.ToString())); if (EbnfErrorLevel.Error == msg.ErrorLevel) { hasErrors = true; } } var cfg = ebnf.ToCfg(); foreach (var msg in cfg.PrepareLL1(false)) { Console.Error.WriteLine(string.Concat("CFG ", msg.ToString())); if (CfgErrorLevel.Error == msg.ErrorLevel) { hasErrors = true; } } Console.Error.WriteLine(); Console.Error.WriteLine(cfg); if (!hasErrors) { if (!string.IsNullOrEmpty(outFile)) { if (null == language) { language = Path.GetExtension(outFile).Substring(1); if ("" == language) { language = null; } } using (var fw = new StreamWriter(File.Open(outFile, FileMode.OpenOrCreate))) { fw.BaseStream.SetLength(0); LLCodeGenerator.WriteParserAndGeneratorClassesTo(ebnf, cfg, @namespace, inp, Path.GetFileNameWithoutExtension(outFile), language, fw); } } else { LLCodeGenerator.WriteParserAndGeneratorClassesTo(ebnf, cfg, @namespace, inp, cfg.StartSymbol, language, Console.Out); } return(0); } return(1); }
public override IList <IList <string> > ToDisjunctions(EbnfDocument parent, Cfg cfg) { string sid = null; var sr = Expression as EbnfRefExpression; if (null != parent && null != sr) { sid = string.Concat(sr.Symbol, "list"); } if (string.IsNullOrEmpty(sid)) { var cc = Expression as EbnfConcatExpression; if (null != cc) { sr = cc.Right as EbnfRefExpression; if (null != sr) { sid = string.Concat(sr.Symbol, "listtail"); } } } if (string.IsNullOrEmpty(sid)) { sid = "implicitlist"; } var _listId = cfg.GetUniqueId(sid); var attrs = new AttributeSet(); attrs.Add("collapsed", true); cfg.AttributeSets.Add(_listId, attrs); var expr = new EbnfOrExpression( new EbnfConcatExpression( new EbnfRefExpression(_listId), Expression), Expression); //if (IsOptional) // expr = new EbnfOrExpression(expr, null); foreach (var nt in expr.ToDisjunctions(parent, cfg)) { CfgRule r = new CfgRule(); r.Left = _listId; foreach (var s in nt) { if (1 < r.Right.Count && null == s) { continue; } r.Right.Add(s); } if (!cfg.Rules.Contains(r)) { cfg.Rules.Add(r); } } if (!IsOptional) { return(new List <IList <string> >(new IList <string>[] { new List <string>(new string[] { _listId }) })); } else { var result = new List <IList <string> >(); result.Add(new List <string>(new string[] { _listId })); result.Add(new List <string>()); return(result); } }
public int Generate(string wszInputFilePath, string bstrInputFileContents, string wszDefaultNamespace, IntPtr[] rgbOutputFileContents, out uint pcbOutput, IVsGeneratorProgress pGenerateProgress) { pcbOutput = 0; try { ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.Progress(0, 4); var hasErrors = false; using (var stm = new MemoryStream()) { EbnfDocument ebnf = null; try { ebnf = EbnfDocument.ReadFrom(wszInputFilePath); } catch (ExpectingException ee) { hasErrors = true; ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.GeneratorError(0, 0, "Error parsing the EBNF: " + ee.Message, (uint)ee.Line - 1, (uint)ee.Column - 1); } ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.Progress(1, 4); foreach (var msg in ebnf.Validate(false)) { switch (msg.ErrorLevel) { case EbnfErrorLevel.Error: ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.GeneratorError(0, 0, "EBNF " + msg.Message, (uint)msg.Line - 1, (uint)msg.Column - 1); hasErrors = true; break; case EbnfErrorLevel.Warning: ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.GeneratorError(1, 0, "EBNF " + msg.Message, (uint)msg.Line - 1, (uint)msg.Column - 1); break; } } ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.Progress(3, 4); var cfg = ebnf.ToCfg(); foreach (var msg in cfg.PrepareLL1(false)) { switch (msg.ErrorLevel) { case CfgErrorLevel.Error: ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.GeneratorError(0, 0, "CFG " + msg.Message, 0, 0); hasErrors = true; break; case CfgErrorLevel.Warning: ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.GeneratorError(1, 0, "CFG " + msg.Message, 0, 0); break; } } if (!hasErrors) { var sw = new StreamWriter(stm); LLCodeGenerator.WriteParserAndGeneratorClassesTo(ebnf, cfg, wszDefaultNamespace, null, Path.GetFileNameWithoutExtension(wszInputFilePath), "cs", sw); sw.Flush(); int length = (int)stm.Length; rgbOutputFileContents[0] = Marshal.AllocCoTaskMem(length); Marshal.Copy(stm.GetBuffer(), 0, rgbOutputFileContents[0], length); pcbOutput = (uint)length; } ThreadHelper.ThrowIfNotOnUIThread(); pGenerateProgress.Progress(4, 4); } } catch (Exception ex) { string s = string.Concat("/* ", ex.Message, " */"); byte[] b = Encoding.UTF8.GetBytes(s); int length = b.Length; rgbOutputFileContents[0] = Marshal.AllocCoTaskMem(length); Marshal.Copy(b, 0, rgbOutputFileContents[0], length); pcbOutput = (uint)length; } return(VSConstants.S_OK); }
public abstract CharFA ToFA(EbnfDocument parent, Cfg cfg);