public static void Main(string[] args) { string langname = args[0].Substring(1); string testFilename = args[1]; LangDescriptor language = null; for (int i = 0; i < languages.length; i++) { if (languages[i].name.Equals(langname)) { language = languages[i]; break; } } if (language == null) { Log.WriteLine("Language " + langname + " unknown"); return; } // load all files up front DateTime load_start = System.DateTime.Now; IList <string> allFiles = Tool.getFilenames(language.corpusDir, language.fileRegex); IList <InputDocument> documents = Tool.load(allFiles, language); DateTime load_stop = System.DateTime.Now; DateTime load_time = (load_stop - load_start) / 1000000; Log.Write("Loaded {0:D} files in {1:D}ms\n", documents.Count, load_time); string path = System.IO.Path.GetFullPath(testFilename); IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path)); IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path)); Debug.Assert(others.Count == documents.Count - 1); if (excluded.Count == 0) { Log.WriteLine("Doc not in corpus: " + path); return; } InputDocument testDoc = excluded[0]; IList <int> training = new List <int>(); IList <int> formatting = new List <int>(); for (int i = 1; i <= TRIALS; i++) { org.antlr.codebuff.misc.Pair <int, int> timing = test(language, others, testDoc); training.Add(timing.a); formatting.Add(timing.b); } // drop first four training = training.subList(5, training.Count); formatting = formatting.subList(5, formatting.Count); Log.Write("median of [5:{0:D}] training {1:D}ms\n", TRIALS - 1, BuffUtils.median(training)); Log.Write("median of [5:{0:D}] formatting {1:D}ms\n", TRIALS - 1, BuffUtils.median(formatting)); }
public void EnterEveryRule(ParserRuleContext ctx) { string ruleName = ruleNames[ctx.RuleIndex]; IList <TerminalNode> tnodes = getDirectTerminalChildren(ctx); // Find all ordered unique pairs of literals; // no (a,a) pairs and only literals like '{', 'begin', '}', ... // Add a for (a,a) into ruleToRepeatedTokensSet for later filtering RuleAltKey ruleAltKey = new RuleAltKey(ruleName, ctx.getAltNumber()); for (int i = 0; i < tnodes.Count; i++) { for (int j = i + 1; j < tnodes.Count; j++) { TerminalNode a = tnodes[i]; TerminalNode b = tnodes[j]; int atype = a.Symbol.Type; int btype = b.Symbol.Type; // KED: THIS CODE DOES NOT WORK WITH GRAMMARS CONTAINING FRAGMENTS. // IN ANTLRV4LEXER.G4 THAT IS IN THIS DIRECTORY, THE GRAMMAR DOES NOT USE // FRAGMENT FOR COLON. BUT THE G4 GRAMMAR IN THE ANTLR GRAMMARS-G4 EXAMPLES, // IT DOES. CONSEQUENTLY, GETLITERALNAME RETURNS NULL! // FRAGMENTS AREN'T PART OF THE VOCABULARY, SO THIS CODE DOES NOT WORK!! // only include literals like '{' and ':' not IDENTIFIER etc... if (vocab.GetLiteralName(atype) == null || vocab.GetLiteralName(btype) == null) { continue; } if (atype == btype) { ISet <int> repeatedTokensSet = null; if (!ruleToRepeatedTokensSet.TryGetValue(ruleAltKey, out repeatedTokensSet)) { repeatedTokensSet = new HashSet <int>(); ruleToRepeatedTokensSet[ruleAltKey] = repeatedTokensSet; } repeatedTokensSet.Add(atype); } else { org.antlr.codebuff.misc.Pair <int, int> pair = new org.antlr.codebuff.misc.Pair <int, int>(atype, btype); ISet <org.antlr.codebuff.misc.Pair <int, int> > pairsBag = null; if (!ruleToPairsBag.TryGetValue(ruleAltKey, out pairsBag)) { pairsBag = new HashSet <org.antlr.codebuff.misc.Pair <int, int> >(); ruleToPairsBag[ruleAltKey] = pairsBag; } pairsBag.Add(pair); } } } }
/// <summary> /// Return map for the various tokens related to this list re list membership </summary> public static IDictionary <Token, org.antlr.codebuff.misc.Pair <bool, int> > getInfoAboutListTokens <T1>(ParserRuleContext ctx, CodeBuffTokenStream tokens, IDictionary <Token, TerminalNode> tokenToNodeMap, IList <T1> siblings, bool isOversizeList) where T1 : Antlr4.Runtime.ParserRuleContext { IDictionary <Token, org.antlr.codebuff.misc.Pair <bool, int> > tokenToListInfo = new Dictionary <Token, org.antlr.codebuff.misc.Pair <bool, int> >(); ParserRuleContext first = siblings[0] as ParserRuleContext; ParserRuleContext last = siblings[siblings.Count - 1] as ParserRuleContext; Token prefixToken = tokens.getPreviousRealToken(first.Start.TokenIndex); // e.g., '(' in an arg list or ':' in grammar def Token suffixToken = tokens.getNextRealToken(last.Stop.TokenIndex); // e.g., LT(1) is last token of list; LT(2) is ')' in an arg list of ';' in grammar def if (prefixToken != null && suffixToken != null) { TerminalNode prefixNode = tokenToNodeMap[prefixToken]; TerminalNode suffixNode = tokenToNodeMap[suffixToken]; bool hasSurroundingTokens = prefixNode != null && prefixNode.Parent == suffixNode.Parent; if (hasSurroundingTokens) { tokenToListInfo[prefixToken] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_PREFIX); tokenToListInfo[suffixToken] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_SUFFIX); } IList <Tree> separators = getSeparators(ctx, siblings); Tree firstSep = separators[0]; tokenToListInfo[(Token)firstSep.Payload] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_FIRST_SEPARATOR); foreach (Tree s in separators.Where((e, i) => i > 0 && i < separators.Count)) { tokenToListInfo[(Token)s.Payload] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_SEPARATOR); } // handle sibling members tokenToListInfo[first.Start] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_FIRST_ELEMENT); foreach (T1 ss in siblings.Where((e, i) => i > 0 && i < siblings.Count)) { var s = ss as ParserRuleContext; tokenToListInfo[s.Start] = new org.antlr.codebuff.misc.Pair <bool, int>(isOversizeList, Trainer.LIST_MEMBER); } } return(tokenToListInfo); }