/// <summary> /// Remove all copy directives tokens. /// </summary> private void EraseCompilerDirectiveTokens() { foreach (TypeCobol.Compiler.Scanner.ITokensLine line in this.CompilationResults.TokensLines) { Compiler.Parser.CodeElementsLine codeElemnLine = line as Compiler.Parser.CodeElementsLine; if (codeElemnLine?.TokensWithCompilerDirectives != null) { foreach (var token in codeElemnLine.TokensWithCompilerDirectives) { if (token is CompilerDirectiveToken) { CompilerDirectiveToken compDirToken = token as CompilerDirectiveToken; if (compDirToken.OriginalTokens != null) { EraseCompilerDirectiveToken(compDirToken.OriginalTokens); } } } } } }
internal TokensGroup InsertCompilerDirectiveTokenOnFirstLine(IList <Token> tokensOnFirstLineBeforeCompilerDirective, CompilerDirective compilerDirective, bool hasError, IList <Token> compilerDirectiveTokensOnFirstLine, IList <Token> tokensOnFirstLineAfterCompilerDirective, bool hasDirectiveTokenContinuedOnNextLine) { // Register compiler listing control directive if (compilerDirective.Type == CompilerDirectiveType.ASTERISK_CBL || compilerDirective.Type == CompilerDirectiveType.ASTERISK_CONTROL || compilerDirective.Type == CompilerDirectiveType.EJECT || compilerDirective.Type == CompilerDirectiveType.SKIP1 || compilerDirective.Type == CompilerDirectiveType.SKIP2 || compilerDirective.Type == CompilerDirectiveType.SKIP3 || compilerDirective.Type == CompilerDirectiveType.TITLE) { CompilerListingControlDirective = compilerDirective; } // Register COPY directives // Prepare dictionary for COPY imported documents if (compilerDirective.Type == CompilerDirectiveType.COPY || compilerDirective.Type == CompilerDirectiveType.EXEC_SQL_INCLUDE) { if (ImportedDocuments == null) { ImportedDocuments = new Dictionary <CopyDirective, ImportedTokensDocument>(1); } ImportedDocuments.Add((CopyDirective)compilerDirective, null); } // Register REPLACE compiler directive if (compilerDirective.Type == CompilerDirectiveType.REPLACE) { ReplaceDirective = (ReplaceDirective)compilerDirective; } // Initialize tokensWithCompilerDirectives // (first compiler directive found on this line) if (!HasCompilerDirectives) { // Initialize tokens list tokensWithCompilerDirectives = new List <Token>(); // Keep tokens before compiler directive if (tokensOnFirstLineBeforeCompilerDirective != null) { foreach (Token token in tokensOnFirstLineBeforeCompilerDirective) { tokensWithCompilerDirectives.Add(token); } } } // Update tokensWithCompilerDirectives // (several compiler directives on the same line) else { // Reset tokens list IList <Token> previousTokens = tokensWithCompilerDirectives; tokensWithCompilerDirectives = new List <Token>(); // Keep tokens before compiler directive Token firstTokenOfCompilerDirective = compilerDirectiveTokensOnFirstLine[0]; foreach (Token token in previousTokens) { if (token == firstTokenOfCompilerDirective) { break; } tokensWithCompilerDirectives.Add(token); } } // Build a CompilerDirectiveToken wrapping all matched tokens on the first line CompilerDirectiveToken directiveToken = new CompilerDirectiveToken( compilerDirective, compilerDirectiveTokensOnFirstLine, hasError); // Add the compilerDirectiveToken tokensWithCompilerDirectives.Add(directiveToken); // Keep tokens after compiler directive if (tokensOnFirstLineAfterCompilerDirective != null) { foreach (Token token in tokensOnFirstLineAfterCompilerDirective) { tokensWithCompilerDirectives.Add(token); } } // Register line continuation properties HasDirectiveTokenContinuedOnNextLine = HasDirectiveTokenContinuedOnNextLine || hasDirectiveTokenContinuedOnNextLine; // Return potentially continued compiler directive token if (tokensOnFirstLineAfterCompilerDirective == null) { return(directiveToken); } else { return(null); } }
public static void GenerateStatisticsForPrograms(CompilationProject project, IEnumerable <string> textNames, TextWriter console, string countersFile, string languageModelForProgramFile, string languageModelForCopyFile) { // Initialize statistics vars // 1. Program analysis after preprocessing // - total number of lines per program (including expanded COPY directives) long[] linesCountDistributionCategories = { 500, 1000, 1500, 2000, 3000, 5000, 7500, 10000, 15000, 20000, 30000, 50000, int.MaxValue }; StatsCounter <CobolTextLineType> linesCounter = new StatsCounter <CobolTextLineType>(linesCountDistributionCategories); // - total number of tokens per program (after preprocessing) long[] tokensCountDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensCounter = new StatsCounter <TokenType>(tokensCountDistributionCategories); // - number of copies per program long[] copiesCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, int.MaxValue }; StatsCounter <CopyDirectiveType> copiesCounter = new StatsCounter <CopyDirectiveType>(copiesCountDistributionCategories); // - number of replaced tokens per program long[] replacedTokensCountDistributionCategories = { 50, 100, 150, 200, 300, 500, 1000, 2000, 5000, 10000, 20000, int.MaxValue }; StatsCounter <TokenType> replacedTokensCounter = new StatsCounter <TokenType>(tokensCountDistributionCategories); // - number of code elements per program long[] codeElementsCountDistributionCategories = { 100, 200, 300, 400, 500, 750, 1000, 1500, 2000, 5000, 10000, int.MaxValue }; StatsCounter <CodeElementType> codeElementCounter = new StatsCounter <CodeElementType>(codeElementsCountDistributionCategories); // 2. Program files before preprocessing // - number of lines per program file (before preprocessing) long[] linesCountPerProgramFileDistributionCategories = { 25, 50, 100, 150, 200, 300, 500, 1000, 1500, 2000, 3000, int.MaxValue }; StatsCounter <CobolTextLineType> linesPerProgramFileCounter = new StatsCounter <CobolTextLineType>(linesCountPerProgramFileDistributionCategories); // - number of tokens per program file (before preprocessing) long[] tokensCountPerProgramFileDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensPerProgramFileCounter = new StatsCounter <TokenType>(tokensCountPerProgramFileDistributionCategories); // - number of compiler directives per program file long[] compilerDirectivesPerProgramFileCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, 75, 100, 200, int.MaxValue }; StatsCounter <CompilerDirectiveType> compilerDirectivesPerProgramFileCounter = new StatsCounter <CompilerDirectiveType>(compilerDirectivesPerProgramFileCountDistributionCategories); // 3. Copy files before preprocessing // - number of references to each copy file IDictionary <string, int> copyFileReferenceCount = new Dictionary <string, int>(); // - number of lines per copy file (before preprocessing) long[] linesCountPerCopyFileDistributionCategories = { 25, 50, 100, 150, 200, 300, 500, 1000, 1500, 2000, 3000, int.MaxValue }; StatsCounter <CobolTextLineType> linesPerCopyFileCounter = new StatsCounter <CobolTextLineType>(linesCountPerCopyFileDistributionCategories); // - number of tokens per copy file (before preprocessing) long[] tokensCountPerCopyFileDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensPerCopyFileCounter = new StatsCounter <TokenType>(tokensCountPerCopyFileDistributionCategories); // - number of compiler directives per copy file long[] compilerDirectivesPerCopyFileCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, 75, 100, 200, int.MaxValue }; StatsCounter <CompilerDirectiveType> compilerDirectivesPerCopyFileCounter = new StatsCounter <CompilerDirectiveType>(compilerDirectivesPerCopyFileCountDistributionCategories); // 4. Language models // - language model to predict the next word in a program LanguageModelGenerator languageModelForProgram = new LanguageModelGenerator(); // - language model to predict the next word in a copy LanguageModelGenerator languageModelForCopy = new LanguageModelGenerator(); // -- Compile and compute stats -- foreach (string textName in textNames) { console.Write(textName + " : compilation ... "); int programCopiesNotFound = 0; try { // Compile program FileCompiler fileCompiler = new FileCompiler(null, textName, project.SourceFileProvider, project, project.ColumnsLayout, project.CompilationOptions.Clone(), null, false, project); fileCompiler.CompileOnce(); CompilationUnit compilationResult = fileCompiler.CompilationResultsForProgram; programCopiesNotFound = 0; // Compute stats console.Write(" OK, compute stats ... "); // STATS for PROGRAM linesCounter.OnBeginProgram(); tokensCounter.OnBeginProgram(); copiesCounter.OnBeginProgram(); replacedTokensCounter.OnBeginProgram(); codeElementCounter.OnBeginProgram(); linesPerProgramFileCounter.OnBeginProgram(); tokensPerProgramFileCounter.OnBeginProgram(); compilerDirectivesPerProgramFileCounter.OnBeginProgram(); languageModelForProgram.OnBeginProgram(); // Iterate over program file lines foreach (var line in compilationResult.CodeElementsDocumentSnapshot.Lines) { // + count lines linesCounter.OnElement((int)line.Type); linesPerProgramFileCounter.OnElement((int)line.Type); // Use symbol information known at parsing time for the tokens to build a language model if (line.CodeElements != null) { foreach (var codeElement in line.CodeElements) { if (codeElement.SymbolInformationForTokens.Count > 0) { languageModelForProgram.AddSymbolInformationForTokens(codeElement.SymbolInformationForTokens); } } } if (line.ImportedDocuments != null) { var symbolInformationForTokens = new Dictionary <Token, SymbolInformation>(); foreach (var copyDirective in line.ImportedDocuments.Keys) { if (copyDirective.TextNameSymbol != null) { symbolInformationForTokens.Add(copyDirective.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective.TextNameSymbol), SymbolType.TextName)); } if (copyDirective.LibraryNameSymbol != null) { symbolInformationForTokens.Add(copyDirective.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective.LibraryNameSymbol), SymbolType.LibraryName)); } } languageModelForProgram.AddSymbolInformationForTokens(symbolInformationForTokens); } // Iterate over tokens on this line foreach (var token in line.SourceTokens) { // + count tokens and build language model tokensPerProgramFileCounter.OnElement((int)token.TokenType); languageModelForProgram.OnToken(token); } // Iterate over compiler directives on this line if (line.HasCompilerDirectives) { foreach (var token in line.TokensWithCompilerDirectives) { CompilerDirectiveToken compilerDirectiveToken = token as CompilerDirectiveToken; if (compilerDirectiveToken != null) { compilerDirectivesPerProgramFileCounter.OnElement((int)compilerDirectiveToken.CompilerDirective.Type); } } } // Iterate over COPY directives on this line if (line.ImportedDocuments != null) { foreach (CopyDirective copyDirective in line.ImportedDocuments.Keys) { // + count COPY directives CopyDirectiveType copyDirectiveType = CopyDirectiveType.Copy; #if EUROINFO_RULES if (copyDirective.InsertSuffixChar) { copyDirectiveType = CopyDirectiveType.CopyReplacingRemarks; } else if (copyDirective.RemoveFirst01Level) { copyDirectiveType = CopyDirectiveType.CopyRemarks; } #endif if (copyDirective.ReplaceOperations != null && copyDirective.ReplaceOperations.Count > 0) { copyDirectiveType = CopyDirectiveType.CopyReplacing; } copiesCounter.OnElement((int)copyDirectiveType); var importedDocument = line.ImportedDocuments[copyDirective]; if (importedDocument == null) { // + count missing COPY files for this program programCopiesNotFound++; } else { // + count references to copy files // AND check if copy file has already been analyzed string copyFileReference = copyDirective.LibraryName + ":" + copyDirective.TextName; if (copyFileReferenceCount.ContainsKey(copyFileReference)) { copyFileReferenceCount[copyFileReference] = copyFileReferenceCount[copyFileReference] + 1; // Iterate over copy file lines foreach (var copyLine in importedDocument.SourceDocument.Lines) { // + count lines inside COPY file linesCounter.OnElement((int)copyLine.Type); linesPerCopyFileCounter.OnElement((int)copyLine.Type); } } else { copyFileReferenceCount.Add(copyFileReference, 1); // STATS FOR COPY linesPerCopyFileCounter.OnBeginProgram(); tokensPerCopyFileCounter.OnBeginProgram(); compilerDirectivesPerCopyFileCounter.OnBeginProgram(); languageModelForCopy.OnBeginProgram(); // Iterate over copy file lines foreach (var copyLine in importedDocument.SourceDocument.Lines) { // + count lines inside COPY file linesCounter.OnElement((int)copyLine.Type); linesPerCopyFileCounter.OnElement((int)copyLine.Type); // Use symbol information known at parsing time for the tokens to build a language model if (copyLine.ImportedDocuments != null) { var symbolInformationForTokens = new Dictionary <Token, SymbolInformation>(); foreach (var copyDirective2 in line.ImportedDocuments.Keys) { if (copyDirective2.TextNameSymbol != null) { symbolInformationForTokens.Add(copyDirective2.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective2.TextNameSymbol), SymbolType.TextName)); } if (copyDirective2.LibraryNameSymbol != null) { symbolInformationForTokens.Add(copyDirective2.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective2.LibraryNameSymbol), SymbolType.LibraryName)); } } languageModelForCopy.AddSymbolInformationForTokens(symbolInformationForTokens); } // Iterate over tokens on this line foreach (var token in copyLine.SourceTokens) { // + count tokens and build language model tokensPerCopyFileCounter.OnElement((int)token.TokenType); languageModelForCopy.OnToken(token); } // Iterate over compiler directives on this line if (copyLine.HasCompilerDirectives) { foreach (var token in copyLine.TokensWithCompilerDirectives) { CompilerDirectiveToken compilerDirectiveToken = token as CompilerDirectiveToken; if (compilerDirectiveToken != null) { compilerDirectivesPerCopyFileCounter.OnElement((int)compilerDirectiveToken.CompilerDirective.Type); } } } } linesPerCopyFileCounter.OnEndProgram(); tokensPerCopyFileCounter.OnEndProgram(); compilerDirectivesPerCopyFileCounter.OnEndProgram(); } } } } // Iterate over code elements on this line if (line.CodeElements != null) { foreach (var codeElement in line.CodeElements) { codeElementCounter.OnElement((int)codeElement.Type); } } } // Iterate over tokens AFTER preprocessing ITokensLinesIterator processedTokensIterator = compilationResult.ProcessedTokensDocumentSnapshot.ProcessedTokens; Token processedToken = null; while ((processedToken = processedTokensIterator.NextToken()) != Token.END_OF_FILE) { tokensCounter.OnElement((int)processedToken.TokenType); ReplacedToken replacedToken = processedToken as ReplacedToken; if (replacedToken != null) { replacedTokensCounter.OnElement((int)replacedToken.OriginalToken.TokenType); } else if (processedToken is ReplacedTokenGroup) { replacedTokensCounter.OnElement((int)TokenType.ContinuationTokenGroup); } } linesCounter.OnEndProgram(); tokensCounter.OnEndProgram(); copiesCounter.OnEndProgram(); replacedTokensCounter.OnEndProgram(); codeElementCounter.OnEndProgram(); linesPerProgramFileCounter.OnEndProgram(); tokensPerProgramFileCounter.OnEndProgram(); compilerDirectivesPerProgramFileCounter.OnEndProgram(); } catch (Exception e) { console.WriteLine("ERROR :"); console.WriteLine(e.Message); } finally { console.Write("FINISHED"); if (programCopiesNotFound == 0) { console.WriteLine(); } else { console.WriteLine(" (" + programCopiesNotFound + " missing COPY)"); } } } // Compute language models languageModelForProgram.ComputeProbabilities(); languageModelForCopy.ComputeProbabilities(); // Write results files console.WriteLine(""); console.WriteLine("Writing statistics results to " + countersFile); using (StreamWriter writer = new StreamWriter(countersFile)) { writer.WriteLine("1. Program analysis after preprocessing"); writer.WriteLine(); WriteTitle(writer, "Total number of lines per program (including expanded COPY directives)"); linesCounter.DisplayResults(writer); WriteTitle(writer, "Total number of tokens per program (after preprocessing)"); tokensCounter.DisplayResults(writer); WriteTitle(writer, "Number of copies per program"); copiesCounter.DisplayResults(writer); WriteTitle(writer, "Number of replaced tokens per program"); replacedTokensCounter.DisplayResults(writer); WriteTitle(writer, "Number of code elements per program"); codeElementCounter.DisplayResults(writer); writer.WriteLine("2. Program files before preprocessing"); writer.WriteLine(); WriteTitle(writer, "Number of lines per program file (before preprocessing)"); linesPerProgramFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of tokens per program file (before preprocessing)"); tokensPerProgramFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of compiler directives per program file"); compilerDirectivesPerProgramFileCounter.DisplayResults(writer); writer.WriteLine("3. Copy files before preprocessing"); writer.WriteLine(); WriteTitle(writer, "Number of references to each copy file"); // copyFileReferenceCount = new Dictionary<string, int>() WriteTitle(writer, "Number of lines per copy file (before preprocessing)"); linesPerCopyFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of tokens per copy file (before preprocessing)"); tokensPerCopyFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of compiler directives per copy file"); compilerDirectivesPerCopyFileCounter.DisplayResults(writer); } console.WriteLine("Done"); console.WriteLine("Writing language model for program to " + languageModelForProgramFile); using (StreamWriter writer = new StreamWriter(languageModelForProgramFile)) { languageModelForProgram.WriteModelFile(writer, console); } console.WriteLine("Writing language model for copy to " + languageModelForCopyFile); using (StreamWriter writer = new StreamWriter(languageModelForCopyFile)) { languageModelForCopy.WriteModelFile(writer, console); } }