Пример #1
0
        private TokenizeResult TokenizeCpp(string text, TextPosition pos, int length, bool allowDoxy)
        {
            TokenizeResult result = new TokenizeResult();
            Stopwatch      timer  = new Stopwatch();

            timer.Restart();
            List <CppToken> cppTokens = new List <CppToken>();

            using (CppLexer cppLexer = new CppLexer(text, pos, length))
            {
                cppTokens.AddRange(cppLexer.Tokenize());
                result.AddErrors(cppLexer.LexErrors);
            }
            timer.Stop();
            result.Stats.CppDuration += timer.Elapsed;
            foreach (CppToken token in cppTokens)
            {
                if (allowDoxy && (token.Kind == CppTokenKind.MultiLineCommentDoc || token.Kind == CppTokenKind.SingleLineCommentDoc))
                {
                    result.AddToken(token);
                    using (TokenizeResult doxyRes = TokenizeDoxy(text, token.Position, token.Length))
                    {
                        result.Stats.DoxyDuration += doxyRes.Stats.DoxyDuration;
                        result.Stats.HtmlDuration += doxyRes.Stats.HtmlDuration;
                        result.AddTokens(doxyRes.Tokens);
                        result.AddErrors(doxyRes.Errors);
                    }
                }
                else
                {
                    result.AddToken(token);
                }
            }
            return(result);
        }
Пример #2
0
        static int Main(string[] args)
        {
            if (args.Length < 1)
            {
                Console.Error.WriteLine("Missing filepath argument!");
                return(-1);
            }

            string filePath = args[0];
            string source   = File.ReadAllText(filePath);

            TokenizerType[] tokenizerTypes = new[] { TokenizerType.Superpower, TokenizerType.DoxygenCpp, TokenizerType.CustomCpp };

            const int numberOfIterationsPerType = 1;
            Dictionary <TokenizerType, List <TimeSpan> > durations = new Dictionary <TokenizerType, List <TimeSpan> >();
            Dictionary <TokenizerType, List <CToken> >   tokenMap  = new Dictionary <TokenizerType, List <CToken> >();

            foreach (TokenizerType tokenizerType in tokenizerTypes)
            {
                List <TimeSpan> spans = new List <TimeSpan>();
                durations.Add(tokenizerType, spans);
                List <CToken> outTokens = new List <CToken>();
                tokenMap.Add(tokenizerType, outTokens);
                for (int iteration = 1; iteration <= numberOfIterationsPerType; ++iteration)
                {
                    outTokens.Clear();
                    Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] start...");
                    Stopwatch timer = Stopwatch.StartNew();
                    switch (tokenizerType)
                    {
                    case TokenizerType.Superpower:
                    {
                        var tokenizer = new CSuperPowerTokenizer();
                        var tokens    = tokenizer.Tokenize(source);
                        foreach (var token in tokens)
                        {
                            if (token.Kind == CppTokenKind.Eof)
                            {
                                break;
                            }
                            var start = new TextPosition(token.Position.Absolute, token.Position.Line - 1, token.Position.Column - 1);
                            var end   = new TextPosition(token.Position.Absolute + token.Span.Length, token.Span.Position.Line - 1, token.Span.Position.Column - 1);
                            var value = source.Substring(start.Index, end.Index - start.Index);
                            outTokens.Add(new CToken(token.Kind, start, end, value));
                        }
                    }
                    break;

                    case TokenizerType.CustomCpp:
                    {
                        using (var stream = new BasicTextStream(source, new TextPosition(0), source.Length))
                        {
                            CToken token;
                            do
                            {
                                token = CTokenizer.GetToken(stream);
                                if (token.Kind == CppTokenKind.Eof)
                                {
                                    break;
                                }
                                outTokens.Add(token);
                            } while (token.Kind != CppTokenKind.Eof);
                        }
                    }
                    break;

                    case TokenizerType.DoxygenCpp:
                    {
                        using (var lexer = new CppLexer(source, new TextPosition(0), source.Length))
                        {
                            var tokens = lexer.Tokenize();
                            foreach (var token in tokens)
                            {
                                if (token.Kind == CppTokenKind.Eof)
                                {
                                    break;
                                }
                                var start = token.Position;
                                var end   = new TextPosition(token.Position.Index + token.Length, token.Position.Line, token.Position.Column);
                                var value = source.Substring(start.Index, end.Index - start.Index);
                                outTokens.Add(new CToken(token.Kind, start, end, value));
                            }
                        }
                    }
                    break;

                    default:
                        throw new Exception($"Unsupported tokenizer type -> {tokenizerType}");
                    }
                    timer.Stop();
                    spans.Add(timer.Elapsed);
                    Console.WriteLine($"{tokenizerType} tokenizer[{iteration}/{numberOfIterationsPerType}] done, got {outTokens.Count()} tokens, took {timer.Elapsed.TotalMilliseconds} ms");
                }
            }

            foreach (TokenizerType tokenizerType in tokenizerTypes)
            {
                List <TimeSpan> timeSpans = durations[tokenizerType];
                TimeSpan        minTime   = GetMinTime(timeSpans);
                TimeSpan        maxTime   = GetMaxTime(timeSpans);
                TimeSpan        avgTime   = GetAvgTime(timeSpans);
                Console.WriteLine($"{tokenizerType} tokenizer, min: {minTime}, max: {maxTime}, avg: {avgTime}, iterations: {numberOfIterationsPerType}");
            }

#if false
            // Compare tokens against each other
            foreach (TokenizerType tokenizerTypeA in tokenizerTypes)
            {
                List <CToken> tokensA = tokenMap[tokenizerTypeA];
                foreach (TokenizerType tokenizerTypeB in tokenizerTypes)
                {
                    List <CToken> tokensB = tokenMap[tokenizerTypeB];
                    if (tokenizerTypeA != tokenizerTypeB)
                    {
                        CompareTokens(tokenizerTypeA, tokensA, tokenizerTypeB, tokensB);
                    }
                }
            }
#endif

            string desktopPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
            foreach (TokenizerType tokenizerType in tokenizerTypes)
            {
                string        filename       = $"tokenizer_{tokenizerType}.txt";
                string        singleFilePath = Path.Combine(desktopPath, filename);
                List <CToken> tokens         = tokenMap[tokenizerType];
                using (StreamWriter writer = new StreamWriter(singleFilePath, false, Encoding.ASCII))
                {
                    foreach (var token in tokens)
                    {
                        writer.Write(token);
                        writer.Write("\n");
                    }
                }
            }


            Console.WriteLine("Press any key to exit");
            Console.ReadKey();
            return(0);
        }