public static Dictionary <string, string> RemoveUselessParserProductions(int pos, Document document) { var result = new Dictionary <string, string>(); // Check if lexer grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType lp = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(lp, pd_parser.ParseTree); var is_lexer = lp.Type == ExtractGrammarType.GrammarType.Lexer; if (is_lexer) { // We don't consider lexer grammars. return(result); } // Consider only the target grammar. Table table = new Table(pd_parser, document); table.ReadRules(); table.FindPartitions(); table.FindStartRules(); List <Pair <int, int> > deletions = new List <Pair <int, int> >(); foreach (var r in table.rules) { if (r.is_parser_rule && r.is_used == false) { deletions.Add(new Pair <int, int>(r.start_index, r.end_index)); } } deletions = deletions.OrderBy(p => p.a).ThenBy(p => p.b).ToList(); StringBuilder sb = new StringBuilder(); int previous = 0; string old_code = document.Code; foreach (var l in deletions) { int index_start = l.a; int len = l.b - l.a; string pre = old_code.Substring(previous, index_start - previous); sb.Append(pre); previous = index_start + len; } string rest = old_code.Substring(previous); sb.Append(rest); string new_code = sb.ToString(); result.Add(document.FullPath, new_code); return(result); }
public static Dictionary <string, string> SplitCombineGrammars(int pos, Document document, bool split) { var result = new Dictionary <string, string>(); // Check if lexer grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType lp = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(lp, pd_parser.ParseTree); if (split && lp.Type != ExtractGrammarType.GrammarType.Combined) { return(null); } if ((!split) && lp.Type != ExtractGrammarType.GrammarType.Parser) { return(null); } Table table = new Table(pd_parser, document); table.ReadRules(); table.FindPartitions(); table.FindStartRules(); string old_code = document.Code; if (split) { // Create a parser and lexer grammar. StringBuilder sb_parser = new StringBuilder(); StringBuilder sb_lexer = new StringBuilder(); int previous_parser = 0; int previous_lexer = 0; var root = pd_parser.ParseTree as ANTLRv4Parser.GrammarSpecContext; if (root == null) { return(null); } int grammar_type_index = 0; if (root.DOC_COMMENT() != null) { grammar_type_index++; } var grammar_type_tree = root.grammarType(); var id = root.id(); var semi_tree = root.SEMI(); var rules_tree = root.rules(); string pre = old_code.Substring(0, pd_parser.TokStream.Get(grammar_type_tree.SourceInterval.a).StartIndex - 0); sb_parser.Append(pre); sb_lexer.Append(pre); sb_parser.Append("parser grammar " + id.GetText() + "Parser;" + Environment.NewLine); sb_lexer.Append("lexer grammar " + id.GetText() + "Lexer;" + Environment.NewLine); int x1 = pd_parser.TokStream.Get(semi_tree.SourceInterval.b).StopIndex + 1; int x2 = pd_parser.TokStream.Get(rules_tree.SourceInterval.a).StartIndex; string n1 = old_code.Substring(x1, x2 - x1); sb_parser.Append(n1); sb_lexer.Append(n1); int end = 0; for (int i = 0; i < table.rules.Count; ++i) { var r = table.rules[i]; // Partition rule symbols. if (r.is_parser_rule) { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } else { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_lexer.Append(n2); } end = r.end_index + 1; } if (end < old_code.Length) { string rest = old_code.Substring(end); sb_parser.Append(rest); sb_lexer.Append(rest); } string g4_file_path = document.FullPath; string current_dir = Path.GetDirectoryName(g4_file_path); if (current_dir == null) { return(null); } string orig_name = Path.GetFileNameWithoutExtension(g4_file_path); string new_code_parser = sb_parser.ToString(); string new_parser_ffn = current_dir + Path.DirectorySeparatorChar + orig_name + "Parser.g4"; string new_lexer_ffn = current_dir + Path.DirectorySeparatorChar + orig_name + "Lexer.g4"; string new_code_lexer = sb_lexer.ToString(); result.Add(new_parser_ffn, new_code_parser); result.Add(new_lexer_ffn, new_code_lexer); result.Add(g4_file_path, null); } else { // Parse lexer grammar. HashSet <string> read_files = new HashSet <string>(); read_files.Add(document.FullPath); for (; ;) { int before_count = read_files.Count; foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Value.Contains(f)).Select( t => t.Key).ToList(); read_files = read_files.Union(additional).ToHashSet(); } int after_count = read_files.Count; if (after_count == before_count) { break; } } List <AntlrGrammarDetails> lexers = new List <AntlrGrammarDetails>(); foreach (string f in read_files) { Workspaces.Document lexer_document = Workspaces.Workspace.Instance.FindDocument(f); if (lexer_document == null) { continue; } AntlrGrammarDetails x = ParserDetailsFactory.Create(lexer_document) as AntlrGrammarDetails; lexers.Add(x); } if (lexers.Count != 2) { return(null); } var pd_lexer = lexers[1]; Workspaces.Document ldocument = Workspaces.Workspace.Instance.FindDocument(pd_lexer.FullFileName); Table lexer_table = new Table(pd_lexer, ldocument); lexer_table.ReadRules(); lexer_table.FindPartitions(); lexer_table.FindStartRules(); // Create a combined parser grammar. StringBuilder sb_parser = new StringBuilder(); var root = pd_parser.ParseTree as ANTLRv4Parser.GrammarSpecContext; if (root == null) { return(null); } int grammar_type_index = 0; if (root.DOC_COMMENT() != null) { grammar_type_index++; } var grammar_type_tree = root.grammarType(); var id = root.id(); var semi_tree = root.SEMI(); var rules_tree = root.rules(); string pre = old_code.Substring(0, pd_parser.TokStream.Get(grammar_type_tree.SourceInterval.a).StartIndex - 0); sb_parser.Append(pre); sb_parser.Append("grammar " + id.GetText().Replace("Parser", "") + ";" + Environment.NewLine); int x1 = pd_parser.TokStream.Get(semi_tree.SourceInterval.b).StopIndex + 1; int x2 = pd_parser.TokStream.Get(rules_tree.SourceInterval.a).StartIndex; string n1 = old_code.Substring(x1, x2 - x1); sb_parser.Append(n1); int end = 0; for (int i = 0; i < table.rules.Count; ++i) { var r = table.rules[i]; if (r.is_parser_rule) { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } end = r.end_index + 1; } if (end < old_code.Length) { string rest = old_code.Substring(end); sb_parser.Append(rest); } end = 0; var lexer_old_code = ldocument.Code; for (int i = 0; i < lexer_table.rules.Count; ++i) { var r = lexer_table.rules[i]; if (!r.is_parser_rule) { string n2 = lexer_old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } end = r.end_index + 1; } if (end < lexer_old_code.Length) { string rest = lexer_old_code.Substring(end); sb_parser.Append(rest); } string g4_file_path = document.FullPath; string current_dir = Path.GetDirectoryName(g4_file_path); if (current_dir == null) { return(null); } string orig_name = Path.GetFileName(g4_file_path); var new_name = orig_name.Replace("Parser.g4", ""); string new_code_parser = sb_parser.ToString(); string new_parser_ffn = current_dir + Path.DirectorySeparatorChar + new_name + ".g4"; result.Add(new_parser_ffn, new_code_parser); result.Add(pd_parser.FullFileName, null); result.Add(pd_lexer.FullFileName, null); } return(result); }
public static Dictionary <string, string> ReorderParserRules(int pos, Document document, LspAntlr.ReorderType type) { var result = new Dictionary <string, string>(); // Check if lexer grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType lp = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(lp, pd_parser.ParseTree); var is_lexer = lp.Type == ExtractGrammarType.GrammarType.Lexer; if (is_lexer) { return(result); } Table table = new Table(pd_parser, document); table.ReadRules(); table.FindPartitions(); table.FindStartRules(); // Find new order or rules. string old_code = document.Code; List <Pair <int, int> > reorder = new List <Pair <int, int> >(); if (type == LspAntlr.ReorderType.DFS) { Digraph <string> graph = new Digraph <string>(); foreach (var r in table.rules) { if (!r.is_parser_rule) { continue; } graph.AddVertex(r.LHS); } foreach (var r in table.rules) { if (!r.is_parser_rule) { continue; } var j = r.RHS; //j.Reverse(); foreach (var rhs in j) { var sym = table.rules.Where(t => t.LHS == rhs).FirstOrDefault(); if (!sym.is_parser_rule) { continue; } var e = new DirectedEdge <string>(r.LHS, rhs); graph.AddEdge(e); } } List <string> starts = new List <string>(); foreach (var r in table.rules) { if (r.is_parser_rule && r.is_start) { starts.Add(r.LHS); } } Graphs.DepthFirstOrder <string, DirectedEdge <string> > sort = new DepthFirstOrder <string, DirectedEdge <string> >(graph, starts); var ordered = sort.ToList(); foreach (var s in ordered) { var row = table.rules[table.nt_to_index[s]]; reorder.Add(new Pair <int, int>(row.start_index, row.end_index)); } } else if (type == LspAntlr.ReorderType.BFS) { Digraph <string> graph = new Digraph <string>(); foreach (var r in table.rules) { if (!r.is_parser_rule) { continue; } graph.AddVertex(r.LHS); } foreach (var r in table.rules) { if (!r.is_parser_rule) { continue; } var j = r.RHS; //j.Reverse(); foreach (var rhs in j) { var sym = table.rules.Where(t => t.LHS == rhs).FirstOrDefault(); if (!sym.is_parser_rule) { continue; } var e = new DirectedEdge <string>(r.LHS, rhs); graph.AddEdge(e); } } List <string> starts = new List <string>(); foreach (var r in table.rules) { if (r.is_parser_rule && r.is_start) { starts.Add(r.LHS); } } Graphs.BreadthFirstOrder <string, DirectedEdge <string> > sort = new BreadthFirstOrder <string, DirectedEdge <string> >(graph, starts); var ordered = sort.ToList(); foreach (var s in ordered) { var row = table.rules[table.nt_to_index[s]]; reorder.Add(new Pair <int, int>(row.start_index, row.end_index)); } } else if (type == LspAntlr.ReorderType.Alphabetically) { var ordered = table.rules .Where(r => r.is_parser_rule) .Select(r => r.LHS) .OrderBy(r => r).ToList(); foreach (var s in ordered) { var row = table.rules[table.nt_to_index[s]]; reorder.Add(new Pair <int, int>(row.start_index, row.end_index)); } } else { return(result); } StringBuilder sb = new StringBuilder(); int previous = 0; { int index_start = table.rules[0].start_index; int len = 0; string pre = old_code.Substring(previous, index_start - previous); sb.Append(pre); previous = index_start + len; } foreach (var l in reorder) { int index_start = l.a; int len = l.b - l.a; string add = old_code.Substring(index_start, len); sb.Append(add); } // Now add all non-parser rules. foreach (var r in table.rules) { if (r.is_parser_rule) { continue; } int index_start = r.start_index; int len = r.end_index - r.start_index; string add = old_code.Substring(index_start, len); sb.Append(add); } //string rest = old_code.Substring(previous); //sb.Append(rest); string new_code = sb.ToString(); result.Add(document.FullPath, new_code); return(result); }
public static Dictionary <string, string> MoveStartRuleToTop(int pos, Document document) { var result = new Dictionary <string, string>(); // Check if lexer grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType lp = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(lp, pd_parser.ParseTree); var is_lexer = lp.Type == ExtractGrammarType.GrammarType.Lexer; if (is_lexer) { // We don't consider lexer grammars. return(result); } // Consider only the target grammar. Table table = new Table(pd_parser, document); table.ReadRules(); table.FindPartitions(); table.FindStartRules(); string old_code = document.Code; List <Pair <int, int> > move = new List <Pair <int, int> >(); foreach (var r in table.rules) { if (r.is_parser_rule && r.is_start == true) { move.Add(new Pair <int, int>(r.start_index, r.end_index)); } } move = move.OrderBy(p => p.a).ThenBy(p => p.b).ToList(); var find_first_rule = new FindFirstRule(); ParseTreeWalker.Default.Walk(find_first_rule, pd_parser.ParseTree); var first_rule = find_first_rule.First; if (first_rule == null) { return(result); } var insertion = first_rule.SourceInterval.a; var insertion_tok = pd_parser.TokStream.Get(insertion); var insertion_ind = insertion_tok.StartIndex; if (move.Count == 1 && move[0].a == insertion_ind) { return(result); } StringBuilder sb = new StringBuilder(); int previous = 0; { int index_start = insertion_ind; int len = 0; string pre = old_code.Substring(previous, index_start - previous); sb.Append(pre); previous = index_start + len; } foreach (var l in move) { int index_start = l.a; int len = l.b - l.a; string add = old_code.Substring(index_start, len); sb.Append(add); } foreach (var l in move) { int index_start = l.a; int len = l.b - l.a; string pre = old_code.Substring(previous, index_start - previous); sb.Append(pre); previous = index_start + len; } string rest = old_code.Substring(previous); sb.Append(rest); string new_code = sb.ToString(); result.Add(document.FullPath, new_code); return(result); }
public static Dictionary <string, string> ReplaceLiterals(int index, Document document) { Dictionary <string, string> result = new Dictionary <string, string>(); // Check if initial file is a grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType egt = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(egt, pd_parser.ParseTree); var is_grammar = egt.Type == ExtractGrammarType.GrammarType.Parser || egt.Type == ExtractGrammarType.GrammarType.Combined || egt.Type == ExtractGrammarType.GrammarType.Lexer; if (!is_grammar) { return(result); } // Find all other grammars by walking dependencies (import, vocab, file names). HashSet <string> read_files = new HashSet <string>(); read_files.Add(document.FullPath); Dictionary <Workspaces.Document, List <TerminalNodeImpl> > every_damn_literal = new Dictionary <Workspaces.Document, List <TerminalNodeImpl> >(); for (; ;) { int before_count = read_files.Count; foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Value.Contains(f)).Select( t => t.Key).ToList(); read_files = read_files.Union(additional).ToHashSet(); } foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Key == f).Select( t => t.Value); foreach (var t in additional) { read_files = read_files.Union(t).ToHashSet(); } } int after_count = read_files.Count; if (after_count == before_count) { break; } } // Find rewrite rules, i.e., string literal to symbol name. Dictionary <string, string> subs = new Dictionary <string, string>(); foreach (string f in read_files) { Workspaces.Document whatever_document = Workspaces.Workspace.Instance.FindDocument(f); if (whatever_document == null) { continue; } AntlrGrammarDetails pd_whatever = ParserDetailsFactory.Create(whatever_document) as AntlrGrammarDetails; // Find literals in grammars. LiteralsGrammar lp_whatever = new LiteralsGrammar(pd_whatever); ParseTreeWalker.Default.Walk(lp_whatever, pd_whatever.ParseTree); List <TerminalNodeImpl> list_literals = lp_whatever.Literals; every_damn_literal[whatever_document] = list_literals; foreach (var lexer_literal in list_literals) { var old_name = lexer_literal.GetText(); // Given candidate, walk up tree to find lexer_rule. /* * ( ruleSpec * ( lexerRuleSpec * ( OFF_CHANNEL text=\r\n\r\n * ) * ( OFF_CHANNEL text=... * ) * (OFF_CHANNEL text =\r\n\r\n * ) * (OFF_CHANNEL text =... * ) * (OFF_CHANNEL text =\r\n\r\n * ) * (DEFAULT_TOKEN_CHANNEL i = 995 txt = NONASSOC tt = 1 * ) * (OFF_CHANNEL text =\r\n\t * ) * (DEFAULT_TOKEN_CHANNEL i = 997 txt =: tt = 29 * ) * (lexerRuleBlock * (lexerAltList * (lexerAlt * (lexerElements * (lexerElement * (lexerAtom * (terminal * (OFF_CHANNEL text = * ) * (DEFAULT_TOKEN_CHANNEL i = 999 txt = '%binary' tt = 8 * )))))))) * (OFF_CHANNEL text =\r\n\t * ) * (DEFAULT_TOKEN_CHANNEL i = 1001 txt =; tt = 32 * ) ) ) * * Make sure it fits the structure of the tree shown above. * */ var p1 = lexer_literal.Parent; if (p1.ChildCount != 1) { continue; } if (!(p1 is ANTLRv4Parser.TerminalContext)) { continue; } var p2 = p1.Parent; if (p2.ChildCount != 1) { continue; } if (!(p2 is ANTLRv4Parser.LexerAtomContext)) { continue; } var p3 = p2.Parent; if (p3.ChildCount != 1) { continue; } if (!(p3 is ANTLRv4Parser.LexerElementContext)) { continue; } var p4 = p3.Parent; if (p4.ChildCount != 1) { continue; } if (!(p4 is ANTLRv4Parser.LexerElementsContext)) { continue; } var p5 = p4.Parent; if (p5.ChildCount != 1) { continue; } if (!(p5 is ANTLRv4Parser.LexerAltContext)) { continue; } var p6 = p5.Parent; if (p6.ChildCount != 1) { continue; } if (!(p6 is ANTLRv4Parser.LexerAltListContext)) { continue; } var p7 = p6.Parent; if (p7.ChildCount != 1) { continue; } if (!(p7 is ANTLRv4Parser.LexerRuleBlockContext)) { continue; } var p8 = p7.Parent; if (p8.ChildCount != 4) { continue; } if (!(p8 is ANTLRv4Parser.LexerRuleSpecContext)) { continue; } var alt = p8.GetChild(0); var new_name = alt.GetText(); subs.Add(old_name, new_name); } } // Find string literals in parser and combined grammars and substitute. Dictionary <TerminalNodeImpl, string> rewrites = new Dictionary <TerminalNodeImpl, string>(); foreach (var pair in every_damn_literal) { var doc = pair.Key; var list_literals = pair.Value; foreach (var l in list_literals) { bool no = false; // Make sure this literal does not appear in lexer rule. for (IRuleNode p = l.Parent; p != null; p = p.Parent) { if (p is ANTLRv4Parser.LexerRuleSpecContext) { no = true; break; } } if (no) { continue; } subs.TryGetValue(l.GetText(), out string re); if (re != null) { rewrites.Add(l, re); } } } var files = rewrites.Select(r => r.Key.Payload.TokenSource.SourceName).OrderBy(q => q).Distinct(); var documents = files.Select(f => { return(Workspaces.Workspace.Instance.FindDocument(f)); }).ToList(); foreach (Document f in documents) { string fn = f.FullPath; var per_file_changes = rewrites.Where(z => z.Key.Payload.TokenSource.SourceName == f.FullPath) .OrderBy(z => z.Key.Payload.TokenIndex).ToList(); StringBuilder sb = new StringBuilder(); int previous = 0; string code = f.Code; foreach (var l in per_file_changes) { string original_text = l.Key.Payload.Text; int index_start = l.Key.Payload.StartIndex; int len = l.Key.Payload.Text.Length; string new_text = l.Value; string pre = code.Substring(previous, index_start - previous); sb.Append(pre); sb.Append(new_text); previous = index_start + len; } string rest = code.Substring(previous); sb.Append(rest); string new_code = sb.ToString(); result.Add(fn, new_code); } return(result); }