private TokenizeResult TokenizeCpp(string text, TextPosition pos, int length, bool allowDoxy) { TokenizeResult result = new TokenizeResult(); Stopwatch timer = new Stopwatch(); timer.Restart(); List <CppToken> cppTokens = new List <CppToken>(); using (CppLexer cppLexer = new CppLexer(text, pos, length)) { cppTokens.AddRange(cppLexer.Tokenize()); result.AddErrors(cppLexer.LexErrors); } timer.Stop(); result.Stats.CppDuration += timer.Elapsed; foreach (CppToken token in cppTokens) { if (allowDoxy && (token.Kind == CppTokenKind.MultiLineCommentDoc || token.Kind == CppTokenKind.SingleLineCommentDoc)) { result.AddToken(token); using (TokenizeResult doxyRes = TokenizeDoxy(text, token.Position, token.Length)) { result.Stats.DoxyDuration += doxyRes.Stats.DoxyDuration; result.Stats.HtmlDuration += doxyRes.Stats.HtmlDuration; result.AddTokens(doxyRes.Tokens); result.AddErrors(doxyRes.Errors); } } else { result.AddToken(token); } } return(result); }
static int Main(string[] args) { if (args.Length < 1) { Console.Error.WriteLine("Missing filepath argument!"); return(-1); } string filePath = args[0]; string source = File.ReadAllText(filePath); TokenizerType[] tokenizerTypes = new[] { TokenizerType.Superpower, TokenizerType.DoxygenCpp, TokenizerType.CustomCpp }; const int numberOfIterationsPerType = 1; Dictionary <TokenizerType, List <TimeSpan> > durations = new Dictionary <TokenizerType, List <TimeSpan> >(); Dictionary <TokenizerType, List <CToken> > tokenMap = new Dictionary <TokenizerType, List <CToken> >(); foreach (TokenizerType tokenizerType in tokenizerTypes) { List <TimeSpan> spans = new List <TimeSpan>(); durations.Add(tokenizerType, spans); List <CToken> outTokens = new List <CToken>(); tokenMap.Add(tokenizerType, outTokens); for (int iteration = 1; iteration <= numberOfIterationsPerType; ++iteration) { outTokens.Clear(); Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] start..."); Stopwatch timer = Stopwatch.StartNew(); switch (tokenizerType) { case TokenizerType.Superpower: { var tokenizer = new CSuperPowerTokenizer(); var tokens = tokenizer.Tokenize(source); foreach (var token in tokens) { if (token.Kind == CppTokenKind.Eof) { break; } var start = new TextPosition(token.Position.Absolute, token.Position.Line - 1, token.Position.Column - 1); var end = new TextPosition(token.Position.Absolute + token.Span.Length, token.Span.Position.Line - 1, token.Span.Position.Column - 1); var value = source.Substring(start.Index, end.Index - start.Index); outTokens.Add(new CToken(token.Kind, start, end, value)); } } break; case TokenizerType.CustomCpp: { using (var stream = new BasicTextStream(source, new TextPosition(0), source.Length)) { CToken token; do { token = CTokenizer.GetToken(stream); if (token.Kind == CppTokenKind.Eof) { break; } outTokens.Add(token); } while (token.Kind != CppTokenKind.Eof); } } break; case TokenizerType.DoxygenCpp: { using (var lexer = new CppLexer(source, new TextPosition(0), source.Length)) { var tokens = lexer.Tokenize(); foreach (var token in tokens) { if (token.Kind == CppTokenKind.Eof) { break; } var start = token.Position; var end = new TextPosition(token.Position.Index + token.Length, token.Position.Line, token.Position.Column); var value = source.Substring(start.Index, end.Index - start.Index); outTokens.Add(new CToken(token.Kind, start, end, value)); } } } break; default: throw new Exception($"Unsupported tokenizer type -> {tokenizerType}"); } timer.Stop(); spans.Add(timer.Elapsed); Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] done, got {outTokens.Count()} tokens, took {timer.Elapsed.TotalMilliseconds} ms"); } } foreach (TokenizerType tokenizerType in tokenizerTypes) { List <TimeSpan> timeSpans = durations[tokenizerType]; TimeSpan minTime = GetMinTime(timeSpans); TimeSpan maxTime = GetMaxTime(timeSpans); TimeSpan avgTime = GetAvgTime(timeSpans); Console.WriteLine($"{tokenizerType} tokenizer, min: {minTime}, max: {maxTime}, avg: {avgTime}, iterations: {numberOfIterationsPerType}"); } #if false // Compare tokens against each other foreach (TokenizerType tokenizerTypeA in tokenizerTypes) { List <CToken> tokensA = tokenMap[tokenizerTypeA]; foreach (TokenizerType tokenizerTypeB in tokenizerTypes) { List <CToken> tokensB = tokenMap[tokenizerTypeB]; if (tokenizerTypeA != tokenizerTypeB) { CompareTokens(tokenizerTypeA, tokensA, tokenizerTypeB, tokensB); } } } #endif string desktopPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop); foreach (TokenizerType tokenizerType in tokenizerTypes) { string filename = $"tokenizer_{tokenizerType}.txt"; string singleFilePath = Path.Combine(desktopPath, filename); List <CToken> tokens = tokenMap[tokenizerType]; using (StreamWriter writer = new StreamWriter(singleFilePath, false, Encoding.ASCII)) { foreach (var token in tokens) { writer.Write(token); writer.Write("\n"); } } } Console.WriteLine("Press any key to exit"); Console.ReadKey(); return(0); }