static int Main(string[] args) { if (args.Length < 1) { Console.Error.WriteLine("Missing filepath argument!"); return(-1); } string filePath = args[0]; string source = File.ReadAllText(filePath); TokenizerType[] tokenizerTypes = new[] { TokenizerType.Superpower, TokenizerType.DoxygenCpp, TokenizerType.CustomCpp }; const int numberOfIterationsPerType = 1; Dictionary <TokenizerType, List <TimeSpan> > durations = new Dictionary <TokenizerType, List <TimeSpan> >(); Dictionary <TokenizerType, List <CToken> > tokenMap = new Dictionary <TokenizerType, List <CToken> >(); foreach (TokenizerType tokenizerType in tokenizerTypes) { List <TimeSpan> spans = new List <TimeSpan>(); durations.Add(tokenizerType, spans); List <CToken> outTokens = new List <CToken>(); tokenMap.Add(tokenizerType, outTokens); for (int iteration = 1; iteration <= numberOfIterationsPerType; ++iteration) { outTokens.Clear(); Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] start..."); Stopwatch timer = Stopwatch.StartNew(); switch (tokenizerType) { case TokenizerType.Superpower: { var tokenizer = new CSuperPowerTokenizer(); var tokens = tokenizer.Tokenize(source); foreach (var token in tokens) { if (token.Kind == CppTokenKind.Eof) { break; } var start = new TextPosition(token.Position.Absolute, token.Position.Line - 1, token.Position.Column - 1); var end = new TextPosition(token.Position.Absolute + token.Span.Length, token.Span.Position.Line - 1, token.Span.Position.Column - 1); var value = source.Substring(start.Index, end.Index - start.Index); outTokens.Add(new CToken(token.Kind, start, end, value)); } } break; case TokenizerType.CustomCpp: { using (var stream = new BasicTextStream(source, new TextPosition(0), source.Length)) { CToken token; do { token = CTokenizer.GetToken(stream); if (token.Kind == CppTokenKind.Eof) { break; } outTokens.Add(token); } while (token.Kind != CppTokenKind.Eof); } } break; case TokenizerType.DoxygenCpp: { using (var lexer = new CppLexer(source, new TextPosition(0), source.Length)) { var tokens = lexer.Tokenize(); foreach (var token in tokens) { if (token.Kind == CppTokenKind.Eof) { break; } var start = token.Position; var end = new TextPosition(token.Position.Index + token.Length, token.Position.Line, token.Position.Column); var value = source.Substring(start.Index, end.Index - start.Index); outTokens.Add(new CToken(token.Kind, start, end, value)); } } } break; default: throw new Exception($"Unsupported tokenizer type -> {tokenizerType}"); } timer.Stop(); spans.Add(timer.Elapsed); Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] done, got {outTokens.Count()} tokens, took {timer.Elapsed.TotalMilliseconds} ms"); } } foreach (TokenizerType tokenizerType in tokenizerTypes) { List <TimeSpan> timeSpans = durations[tokenizerType]; TimeSpan minTime = GetMinTime(timeSpans); TimeSpan maxTime = GetMaxTime(timeSpans); TimeSpan avgTime = GetAvgTime(timeSpans); Console.WriteLine($"{tokenizerType} tokenizer, min: {minTime}, max: {maxTime}, avg: {avgTime}, iterations: {numberOfIterationsPerType}"); } #if false // Compare tokens against each other foreach (TokenizerType tokenizerTypeA in tokenizerTypes) { List <CToken> tokensA = tokenMap[tokenizerTypeA]; foreach (TokenizerType tokenizerTypeB in tokenizerTypes) { List <CToken> tokensB = tokenMap[tokenizerTypeB]; if (tokenizerTypeA != tokenizerTypeB) { CompareTokens(tokenizerTypeA, tokensA, tokenizerTypeB, tokensB); } } } #endif string desktopPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop); foreach (TokenizerType tokenizerType in tokenizerTypes) { string filename = $"tokenizer_{tokenizerType}.txt"; string singleFilePath = Path.Combine(desktopPath, filename); List <CToken> tokens = tokenMap[tokenizerType]; using (StreamWriter writer = new StreamWriter(singleFilePath, false, Encoding.ASCII)) { foreach (var token in tokens) { writer.Write(token); writer.Write("\n"); } } } Console.WriteLine("Press any key to exit"); Console.ReadKey(); return(0); }
private bool ParseCommand(LinkedListStream <IBaseToken> stream, IBaseNode contentRoot) { // @NOTE(final): This must always return true, due to the fact that the stream is advanced at least once DoxygenToken commandToken = stream.Peek <DoxygenToken>(); Debug.Assert(commandToken != null && commandToken.Kind == DoxygenTokenKind.Command); string commandName = commandToken.Value.Substring(1); stream.Next(); string typeName = "Command"; var rule = DoxygenSyntax.GetCommandRule(commandName); if (rule != null) { if (rule.Kind == DoxygenSyntax.CommandKind.EndCommandBlock) { var t = Top; if (t == null) { AddError(commandToken.Position, $"Unterminated starting command block in command '{commandName}'", typeName, commandName); return(false); } if (t.Entity.Kind != DoxygenEntityKind.BlockCommand) { AddError(commandToken.Position, $"Expect starting command block, but found '{t.Entity.Kind}' in command '{commandName}'", typeName, commandName); return(false); } Pop(); } // Paragraph or section command starts or command block starts -> Close previous paragraph or sectioning command if (rule.Kind == DoxygenSyntax.CommandKind.Paragraph || rule.Kind == DoxygenSyntax.CommandKind.Section || rule.Kind == DoxygenSyntax.CommandKind.StartCommandBlock) { var t = Top; if (t != null) { if (t.Entity.Kind == DoxygenEntityKind.Paragraph || t.Entity.Kind == DoxygenEntityKind.Section || t.Entity.Kind == DoxygenEntityKind.SubSection || t.Entity.Kind == DoxygenEntityKind.SubSubSection) { Pop(); } } } DoxygenEntity commandEntity = null; IEntityBaseNode <DoxygenEntity> commandNode = null; if (rule.EntityKind != DoxygenEntityKind.None) { commandEntity = new DoxygenEntity(rule.EntityKind, commandToken.Range); commandEntity.Id = commandName; commandNode = new DoxygenNode(Top, commandEntity); if (rule.IsPush) { Push(commandNode); } else { Add(commandNode); } } foreach (var arg in rule.Args) { DoxygenToken argToken = stream.Peek <DoxygenToken>(); if (argToken == null) { break; } DoxygenTokenKind expectedTokenKind = DoxygenSyntax.ArgumentToTokenKindMap.ContainsKey(arg.Kind) ? DoxygenSyntax.ArgumentToTokenKindMap[arg.Kind] : DoxygenTokenKind.Invalid; if (expectedTokenKind == DoxygenTokenKind.Invalid) { break; } if (expectedTokenKind != argToken.Kind) { AddError(argToken.Position, $"Expect argument token '{expectedTokenKind}', but got '{argToken.Kind}'", typeName, commandName); break; } if (commandNode != null) { string paramName = arg.Name; string paramValue = argToken.Value; commandNode.Entity.AddParameter(argToken, paramName, paramValue); } stream.Next(); } if (commandEntity != null) { // Get name and text parameter (Some commands, have different names and text parameters, so there is a variable list of strings) var nameParam = commandEntity.FindParameterByName("name", "id"); var textParam = commandEntity.FindParameterByName("text", "title", "caption"); if (nameParam == null || string.IsNullOrWhiteSpace(nameParam.Value)) { if (rule.Kind == DoxygenSyntax.CommandKind.Section) { if (!"mainpage".Equals(commandName)) { AddError(commandToken.Position, $"Missing identifier mapping for command '{commandName}'", typeName, commandName); } } } if (nameParam != null && !string.IsNullOrWhiteSpace(nameParam.Value)) { string symbolName = nameParam.Value; Debug.Assert(commandNode != null); if (rule.Kind == DoxygenSyntax.CommandKind.Section) { SourceSymbolKind kind = SourceSymbolKind.DoxygenSection; if ("page".Equals(commandName) || "mainpage".Equals(commandName)) { kind = SourceSymbolKind.DoxygenPage; } SymbolCache.AddSource(Tag, symbolName, new SourceSymbol(kind, nameParam.Token.Range, commandNode)); } else if ("ref".Equals(commandName) || "refitem".Equals(commandName)) { string referenceValue = nameParam.Value; TextPosition startPos = new TextPosition(0, nameParam.Token.Position.Line, nameParam.Token.Position.Column); using (TextStream referenceTextStream = new BasicTextStream(referenceValue, startPos, referenceValue.Length)) { ReferenceSymbolKind referenceTarget = ReferenceSymbolKind.Any; while (!referenceTextStream.IsEOF) { char first = referenceTextStream.Peek(); char second = referenceTextStream.Peek(1); char third = referenceTextStream.Peek(2); if (SyntaxUtils.IsIdentStart(first)) { referenceTextStream.StartLexeme(); while (!referenceTextStream.IsEOF) { if (!SyntaxUtils.IsIdentPart(referenceTextStream.Peek())) { break; } referenceTextStream.AdvanceColumn(); } var refRange = referenceTextStream.LexemeRange; string singleRereference = referenceTextStream.GetSourceText(refRange.Index, refRange.Length); if (referenceTextStream.Peek() == '(') { referenceTarget = ReferenceSymbolKind.CppFunction; referenceTextStream.AdvanceColumn(); while (!referenceTextStream.IsEOF) { if (referenceTextStream.Peek() == ')') { break; } referenceTextStream.AdvanceColumn(); } } var symbolRange = new TextRange(new TextPosition(nameParam.Token.Position.Index + refRange.Position.Index, refRange.Position.Line, refRange.Position.Column), refRange.Length); SymbolCache.AddReference(Tag, singleRereference, new ReferenceSymbol(referenceTarget, symbolRange, commandNode)); } else if (first == '#' || first == '.') { referenceTarget = ReferenceSymbolKind.CppMember; referenceTextStream.AdvanceColumn(); } else if (first == ':' || second == ':') { referenceTarget = ReferenceSymbolKind.CppMember; referenceTextStream.AdvanceColumns(2); } else { break; } } } } else if ("subpage".Equals(commandName)) { SymbolCache.AddReference(Tag, symbolName, new ReferenceSymbol(ReferenceSymbolKind.DoxygenPage, nameParam.Token.Range, commandNode)); } } } ParseBlockContent(stream, commandNode); } else { AddError(commandToken.Position, $"No parse rule for command '{commandName}' found", "Command", commandName); } return(true); }