public static Dictionary <string, string> SplitCombineGrammars(int pos, Document document, bool split) { var result = new Dictionary <string, string>(); // Check if lexer grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType lp = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(lp, pd_parser.ParseTree); if (split && lp.Type != ExtractGrammarType.GrammarType.Combined) { return(null); } if ((!split) && lp.Type != ExtractGrammarType.GrammarType.Parser) { return(null); } Table table = new Table(pd_parser, document); table.ReadRules(); table.FindPartitions(); table.FindStartRules(); string old_code = document.Code; if (split) { // Create a parser and lexer grammar. StringBuilder sb_parser = new StringBuilder(); StringBuilder sb_lexer = new StringBuilder(); int previous_parser = 0; int previous_lexer = 0; var root = pd_parser.ParseTree as ANTLRv4Parser.GrammarSpecContext; if (root == null) { return(null); } int grammar_type_index = 0; if (root.DOC_COMMENT() != null) { grammar_type_index++; } var grammar_type_tree = root.grammarType(); var id = root.id(); var semi_tree = root.SEMI(); var rules_tree = root.rules(); string pre = old_code.Substring(0, pd_parser.TokStream.Get(grammar_type_tree.SourceInterval.a).StartIndex - 0); sb_parser.Append(pre); sb_lexer.Append(pre); sb_parser.Append("parser grammar " + id.GetText() + "Parser;" + Environment.NewLine); sb_lexer.Append("lexer grammar " + id.GetText() + "Lexer;" + Environment.NewLine); int x1 = pd_parser.TokStream.Get(semi_tree.SourceInterval.b).StopIndex + 1; int x2 = pd_parser.TokStream.Get(rules_tree.SourceInterval.a).StartIndex; string n1 = old_code.Substring(x1, x2 - x1); sb_parser.Append(n1); sb_lexer.Append(n1); int end = 0; for (int i = 0; i < table.rules.Count; ++i) { var r = table.rules[i]; // Partition rule symbols. if (r.is_parser_rule) { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } else { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_lexer.Append(n2); } end = r.end_index + 1; } if (end < old_code.Length) { string rest = old_code.Substring(end); sb_parser.Append(rest); sb_lexer.Append(rest); } string g4_file_path = document.FullPath; string current_dir = Path.GetDirectoryName(g4_file_path); if (current_dir == null) { return(null); } string orig_name = Path.GetFileNameWithoutExtension(g4_file_path); string new_code_parser = sb_parser.ToString(); string new_parser_ffn = current_dir + Path.DirectorySeparatorChar + orig_name + "Parser.g4"; string new_lexer_ffn = current_dir + Path.DirectorySeparatorChar + orig_name + "Lexer.g4"; string new_code_lexer = sb_lexer.ToString(); result.Add(new_parser_ffn, new_code_parser); result.Add(new_lexer_ffn, new_code_lexer); result.Add(g4_file_path, null); } else { // Parse lexer grammar. HashSet <string> read_files = new HashSet <string>(); read_files.Add(document.FullPath); for (; ;) { int before_count = read_files.Count; foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Value.Contains(f)).Select( t => t.Key).ToList(); read_files = read_files.Union(additional).ToHashSet(); } int after_count = read_files.Count; if (after_count == before_count) { break; } } List <AntlrGrammarDetails> lexers = new List <AntlrGrammarDetails>(); foreach (string f in read_files) { Workspaces.Document lexer_document = Workspaces.Workspace.Instance.FindDocument(f); if (lexer_document == null) { continue; } AntlrGrammarDetails x = ParserDetailsFactory.Create(lexer_document) as AntlrGrammarDetails; lexers.Add(x); } if (lexers.Count != 2) { return(null); } var pd_lexer = lexers[1]; Workspaces.Document ldocument = Workspaces.Workspace.Instance.FindDocument(pd_lexer.FullFileName); Table lexer_table = new Table(pd_lexer, ldocument); lexer_table.ReadRules(); lexer_table.FindPartitions(); lexer_table.FindStartRules(); // Create a combined parser grammar. StringBuilder sb_parser = new StringBuilder(); var root = pd_parser.ParseTree as ANTLRv4Parser.GrammarSpecContext; if (root == null) { return(null); } int grammar_type_index = 0; if (root.DOC_COMMENT() != null) { grammar_type_index++; } var grammar_type_tree = root.grammarType(); var id = root.id(); var semi_tree = root.SEMI(); var rules_tree = root.rules(); string pre = old_code.Substring(0, pd_parser.TokStream.Get(grammar_type_tree.SourceInterval.a).StartIndex - 0); sb_parser.Append(pre); sb_parser.Append("grammar " + id.GetText().Replace("Parser", "") + ";" + Environment.NewLine); int x1 = pd_parser.TokStream.Get(semi_tree.SourceInterval.b).StopIndex + 1; int x2 = pd_parser.TokStream.Get(rules_tree.SourceInterval.a).StartIndex; string n1 = old_code.Substring(x1, x2 - x1); sb_parser.Append(n1); int end = 0; for (int i = 0; i < table.rules.Count; ++i) { var r = table.rules[i]; if (r.is_parser_rule) { string n2 = old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } end = r.end_index + 1; } if (end < old_code.Length) { string rest = old_code.Substring(end); sb_parser.Append(rest); } end = 0; var lexer_old_code = ldocument.Code; for (int i = 0; i < lexer_table.rules.Count; ++i) { var r = lexer_table.rules[i]; if (!r.is_parser_rule) { string n2 = lexer_old_code.Substring(r.start_index, r.end_index - r.start_index); sb_parser.Append(n2); } end = r.end_index + 1; } if (end < lexer_old_code.Length) { string rest = lexer_old_code.Substring(end); sb_parser.Append(rest); } string g4_file_path = document.FullPath; string current_dir = Path.GetDirectoryName(g4_file_path); if (current_dir == null) { return(null); } string orig_name = Path.GetFileName(g4_file_path); var new_name = orig_name.Replace("Parser.g4", ""); string new_code_parser = sb_parser.ToString(); string new_parser_ffn = current_dir + Path.DirectorySeparatorChar + new_name + ".g4"; result.Add(new_parser_ffn, new_code_parser); result.Add(pd_parser.FullFileName, null); result.Add(pd_lexer.FullFileName, null); } return(result); }
public static Dictionary <string, string> ReplaceLiterals(int index, Document document) { Dictionary <string, string> result = new Dictionary <string, string>(); // Check if initial file is a grammar. AntlrGrammarDetails pd_parser = ParserDetailsFactory.Create(document) as AntlrGrammarDetails; ExtractGrammarType egt = new ExtractGrammarType(); ParseTreeWalker.Default.Walk(egt, pd_parser.ParseTree); var is_grammar = egt.Type == ExtractGrammarType.GrammarType.Parser || egt.Type == ExtractGrammarType.GrammarType.Combined || egt.Type == ExtractGrammarType.GrammarType.Lexer; if (!is_grammar) { return(result); } // Find all other grammars by walking dependencies (import, vocab, file names). HashSet <string> read_files = new HashSet <string>(); read_files.Add(document.FullPath); Dictionary <Workspaces.Document, List <TerminalNodeImpl> > every_damn_literal = new Dictionary <Workspaces.Document, List <TerminalNodeImpl> >(); for (; ;) { int before_count = read_files.Count; foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Value.Contains(f)).Select( t => t.Key).ToList(); read_files = read_files.Union(additional).ToHashSet(); } foreach (var f in read_files) { var additional = AntlrGrammarDetails._dependent_grammars.Where( t => t.Key == f).Select( t => t.Value); foreach (var t in additional) { read_files = read_files.Union(t).ToHashSet(); } } int after_count = read_files.Count; if (after_count == before_count) { break; } } // Find rewrite rules, i.e., string literal to symbol name. Dictionary <string, string> subs = new Dictionary <string, string>(); foreach (string f in read_files) { Workspaces.Document whatever_document = Workspaces.Workspace.Instance.FindDocument(f); if (whatever_document == null) { continue; } AntlrGrammarDetails pd_whatever = ParserDetailsFactory.Create(whatever_document) as AntlrGrammarDetails; // Find literals in grammars. LiteralsGrammar lp_whatever = new LiteralsGrammar(pd_whatever); ParseTreeWalker.Default.Walk(lp_whatever, pd_whatever.ParseTree); List <TerminalNodeImpl> list_literals = lp_whatever.Literals; every_damn_literal[whatever_document] = list_literals; foreach (var lexer_literal in list_literals) { var old_name = lexer_literal.GetText(); // Given candidate, walk up tree to find lexer_rule. /* * ( ruleSpec * ( lexerRuleSpec * ( OFF_CHANNEL text=\r\n\r\n * ) * ( OFF_CHANNEL text=... * ) * (OFF_CHANNEL text =\r\n\r\n * ) * (OFF_CHANNEL text =... * ) * (OFF_CHANNEL text =\r\n\r\n * ) * (DEFAULT_TOKEN_CHANNEL i = 995 txt = NONASSOC tt = 1 * ) * (OFF_CHANNEL text =\r\n\t * ) * (DEFAULT_TOKEN_CHANNEL i = 997 txt =: tt = 29 * ) * (lexerRuleBlock * (lexerAltList * (lexerAlt * (lexerElements * (lexerElement * (lexerAtom * (terminal * (OFF_CHANNEL text = * ) * (DEFAULT_TOKEN_CHANNEL i = 999 txt = '%binary' tt = 8 * )))))))) * (OFF_CHANNEL text =\r\n\t * ) * (DEFAULT_TOKEN_CHANNEL i = 1001 txt =; tt = 32 * ) ) ) * * Make sure it fits the structure of the tree shown above. * */ var p1 = lexer_literal.Parent; if (p1.ChildCount != 1) { continue; } if (!(p1 is ANTLRv4Parser.TerminalContext)) { continue; } var p2 = p1.Parent; if (p2.ChildCount != 1) { continue; } if (!(p2 is ANTLRv4Parser.LexerAtomContext)) { continue; } var p3 = p2.Parent; if (p3.ChildCount != 1) { continue; } if (!(p3 is ANTLRv4Parser.LexerElementContext)) { continue; } var p4 = p3.Parent; if (p4.ChildCount != 1) { continue; } if (!(p4 is ANTLRv4Parser.LexerElementsContext)) { continue; } var p5 = p4.Parent; if (p5.ChildCount != 1) { continue; } if (!(p5 is ANTLRv4Parser.LexerAltContext)) { continue; } var p6 = p5.Parent; if (p6.ChildCount != 1) { continue; } if (!(p6 is ANTLRv4Parser.LexerAltListContext)) { continue; } var p7 = p6.Parent; if (p7.ChildCount != 1) { continue; } if (!(p7 is ANTLRv4Parser.LexerRuleBlockContext)) { continue; } var p8 = p7.Parent; if (p8.ChildCount != 4) { continue; } if (!(p8 is ANTLRv4Parser.LexerRuleSpecContext)) { continue; } var alt = p8.GetChild(0); var new_name = alt.GetText(); subs.Add(old_name, new_name); } } // Find string literals in parser and combined grammars and substitute. Dictionary <TerminalNodeImpl, string> rewrites = new Dictionary <TerminalNodeImpl, string>(); foreach (var pair in every_damn_literal) { var doc = pair.Key; var list_literals = pair.Value; foreach (var l in list_literals) { bool no = false; // Make sure this literal does not appear in lexer rule. for (IRuleNode p = l.Parent; p != null; p = p.Parent) { if (p is ANTLRv4Parser.LexerRuleSpecContext) { no = true; break; } } if (no) { continue; } subs.TryGetValue(l.GetText(), out string re); if (re != null) { rewrites.Add(l, re); } } } var files = rewrites.Select(r => r.Key.Payload.TokenSource.SourceName).OrderBy(q => q).Distinct(); var documents = files.Select(f => { return(Workspaces.Workspace.Instance.FindDocument(f)); }).ToList(); foreach (Document f in documents) { string fn = f.FullPath; var per_file_changes = rewrites.Where(z => z.Key.Payload.TokenSource.SourceName == f.FullPath) .OrderBy(z => z.Key.Payload.TokenIndex).ToList(); StringBuilder sb = new StringBuilder(); int previous = 0; string code = f.Code; foreach (var l in per_file_changes) { string original_text = l.Key.Payload.Text; int index_start = l.Key.Payload.StartIndex; int len = l.Key.Payload.Text.Length; string new_text = l.Value; string pre = code.Substring(previous, index_start - previous); sb.Append(pre); sb.Append(new_text); previous = index_start + len; } string rest = code.Substring(previous); sb.Append(rest); string new_code = sb.ToString(); result.Add(fn, new_code); } return(result); }