private static string ProcessTokensDocument(string testName, ProcessedTokensDocument processedDoc) { // Tokens StringBuilder sbTokens = new StringBuilder(); ITokensLinesIterator tokens = processedDoc.ProcessedTokens; Token token = tokens.NextToken(); if (token != Token.END_OF_FILE) { string documentPath = null; int lineIndex = -1; do { if (tokens.DocumentPath != documentPath) { documentPath = tokens.DocumentPath; sbTokens.AppendLine("** Document path " + documentPath + " **"); } if (tokens.LineIndex != lineIndex) { lineIndex = tokens.LineIndex; sbTokens.AppendLine("-- Line " + (lineIndex + 1) + " --"); } sbTokens.AppendLine(token.ToString()); }while ((token = tokens.NextToken()) != Token.END_OF_FILE); } // Errors StringBuilder sbDiagnostics = new StringBuilder(); sbDiagnostics.AppendLine(); sbDiagnostics.AppendLine("++ Preprocessor diagnostics ++"); bool hasDiagnostic = false; int lineNumber = 1; foreach (var line in processedDoc.Lines) { if (line.PreprocessorDiagnostics != null) { sbDiagnostics.AppendLine("-- Line " + lineNumber + " --"); foreach (Diagnostic diagnostic in line.PreprocessorDiagnostics) { hasDiagnostic = true; sbDiagnostics.AppendLine(diagnostic.ToString()); } } lineNumber++; } return(sbTokens.ToString() + (hasDiagnostic ? sbDiagnostics.ToString() : "")); }
/// <summary> /// Implement COPY REPLACING on top of an underlying tokens line iterator /// </summary> public ReplaceTokensLinesIterator(ITokensLinesIterator sourceIterator, CopyDirective copyReplacingDirective) { this.sourceIterator = sourceIterator; this.CopyReplacingDirective = copyReplacingDirective; if(copyReplacingDirective.ReplaceOperations.Count > 0) { if(copyReplacingDirective.ReplaceOperations.Count == 1) { currentPosition.ReplaceOperation = copyReplacingDirective.ReplaceOperations[0]; } else { currentPosition.ReplaceOperations = copyReplacingDirective.ReplaceOperations; } } }
/// <summary> /// Implement COPY REPLACING on top of an underlying tokens line iterator /// </summary> public ReplaceTokensLinesIterator(ITokensLinesIterator sourceIterator, CopyDirective copyReplacingDirective) { this.sourceIterator = sourceIterator; this.CopyReplacingDirective = copyReplacingDirective; if (copyReplacingDirective.ReplaceOperations.Count > 0) { if (copyReplacingDirective.ReplaceOperations.Count == 1) { currentPosition.ReplaceOperation = copyReplacingDirective.ReplaceOperations[0]; } else { currentPosition.ReplaceOperations = copyReplacingDirective.ReplaceOperations; } } }
/// <summary> /// Iterator over the tokens contained in this imported document after /// - REPLACING directive processing if necessary /// </summary> public ITokensLinesIterator GetProcessedTokensIterator() { ITokensLinesIterator sourceIterator = ProcessedTokensDocument.GetProcessedTokensIterator(SourceDocument.TextSourceInfo, SourceDocument.Lines); if (HasReplacingDirective #if EUROINFO_LEGACY_REPLACING_SYNTAX || CopyDirective.RemoveFirst01Level || CopyDirective.InsertSuffixChar #endif ) { ITokensLinesIterator replaceIterator = new ReplaceTokensLinesIterator(sourceIterator, CopyDirective); return(replaceIterator); } else { return(sourceIterator); } }
/// <summary> /// Iterator over the tokens contained in this imported document after /// - REPLACING directive processing if necessary /// </summary> public ITokensLinesIterator GetProcessedTokensIterator() { ITokensLinesIterator sourceIterator = ProcessedTokensDocument.GetProcessedTokensIterator(SourceDocument.TextSourceInfo, SourceDocument.Lines, this.CompilerOptions); if (HasReplacingDirective #if EUROINFO_RULES || (this.CompilerOptions.UseEuroInformationLegacyReplacingSyntax && (this.CopyDirective.RemoveFirst01Level || CopyDirective.InsertSuffixChar)) #endif ) { ITokensLinesIterator replaceIterator = new ReplaceTokensLinesIterator(sourceIterator, CopyDirective, CompilerOptions); return(replaceIterator); } else { return(sourceIterator); } }
public void BeginParsingFile(TextSourceInfo textSourceInfo, ITokensLinesIterator tokensCountIterator) { CurrentFileInfo = new ParsedFileInfo(textSourceInfo.Name, parserRulesCount, parserDecisionsCount); // Only for CodeElementsParser if (tokensCountIterator != null) { ITokensLine lastLine = null; Token token = null; while ((token = tokensCountIterator.NextToken()) != Token.END_OF_FILE) { CurrentFileInfo.TokensCount++; if (token.TokensLine != lastLine) { CurrentFileInfo.LinesCount++; lastLine = token.TokensLine; } } } }
public IToken Create(Tuple <ITokenSource, ICharStream> source, int type, string text, int channel, int start, int stop, int line, int charPositionInLine) { if (text == null) { text = String.Empty; } ITokensLinesIterator iterator = ((TokensLinesTokenSource)source.Item1).TokensIterator; var missingTokenLine = iterator.CurrentLine; if (missingTokenLine == null) { // iterator already returned EOF missingTokenLine = iterator.LastLine; charPositionInLine = missingTokenLine.Length; } Token token = new MissingToken((TokenType)type, text, missingTokenLine, charPositionInLine); token.SetAntlrSource(source.Item1); token.Channel = channel; SetTokenLiteralValue(token, text); return(token); }
/// <summary> /// Implement REPLACE directives on top of a CopyTokensLinesIterator /// </summary> public ReplaceTokensLinesIterator(ITokensLinesIterator sourceIterator) { this.sourceIterator = sourceIterator; }
public TokensLinesTokenSource(string sourceFileName, ITokensLinesIterator tokensIterator) { this.sourceFileName = sourceFileName; this.tokensIterator = tokensIterator; }
public static void GenerateStatisticsForPrograms(CompilationProject project, IEnumerable <string> textNames, TextWriter console, string countersFile, string languageModelForProgramFile, string languageModelForCopyFile) { // Initialize statistics vars // 1. Program analysis after preprocessing // - total number of lines per program (including expanded COPY directives) long[] linesCountDistributionCategories = { 500, 1000, 1500, 2000, 3000, 5000, 7500, 10000, 15000, 20000, 30000, 50000, int.MaxValue }; StatsCounter <CobolTextLineType> linesCounter = new StatsCounter <CobolTextLineType>(linesCountDistributionCategories); // - total number of tokens per program (after preprocessing) long[] tokensCountDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensCounter = new StatsCounter <TokenType>(tokensCountDistributionCategories); // - number of copies per program long[] copiesCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, int.MaxValue }; StatsCounter <CopyDirectiveType> copiesCounter = new StatsCounter <CopyDirectiveType>(copiesCountDistributionCategories); // - number of replaced tokens per program long[] replacedTokensCountDistributionCategories = { 50, 100, 150, 200, 300, 500, 1000, 2000, 5000, 10000, 20000, int.MaxValue }; StatsCounter <TokenType> replacedTokensCounter = new StatsCounter <TokenType>(tokensCountDistributionCategories); // - number of code elements per program long[] codeElementsCountDistributionCategories = { 100, 200, 300, 400, 500, 750, 1000, 1500, 2000, 5000, 10000, int.MaxValue }; StatsCounter <CodeElementType> codeElementCounter = new StatsCounter <CodeElementType>(codeElementsCountDistributionCategories); // 2. Program files before preprocessing // - number of lines per program file (before preprocessing) long[] linesCountPerProgramFileDistributionCategories = { 25, 50, 100, 150, 200, 300, 500, 1000, 1500, 2000, 3000, int.MaxValue }; StatsCounter <CobolTextLineType> linesPerProgramFileCounter = new StatsCounter <CobolTextLineType>(linesCountPerProgramFileDistributionCategories); // - number of tokens per program file (before preprocessing) long[] tokensCountPerProgramFileDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensPerProgramFileCounter = new StatsCounter <TokenType>(tokensCountPerProgramFileDistributionCategories); // - number of compiler directives per program file long[] compilerDirectivesPerProgramFileCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, 75, 100, 200, int.MaxValue }; StatsCounter <CompilerDirectiveType> compilerDirectivesPerProgramFileCounter = new StatsCounter <CompilerDirectiveType>(compilerDirectivesPerProgramFileCountDistributionCategories); // 3. Copy files before preprocessing // - number of references to each copy file IDictionary <string, int> copyFileReferenceCount = new Dictionary <string, int>(); // - number of lines per copy file (before preprocessing) long[] linesCountPerCopyFileDistributionCategories = { 25, 50, 100, 150, 200, 300, 500, 1000, 1500, 2000, 3000, int.MaxValue }; StatsCounter <CobolTextLineType> linesPerCopyFileCounter = new StatsCounter <CobolTextLineType>(linesCountPerCopyFileDistributionCategories); // - number of tokens per copy file (before preprocessing) long[] tokensCountPerCopyFileDistributionCategories = { 1500, 3000, 4500, 6000, 9000, 15000, 22500, 30000, 45000, 60000, 90000, 150000, int.MaxValue }; StatsCounter <TokenType> tokensPerCopyFileCounter = new StatsCounter <TokenType>(tokensCountPerCopyFileDistributionCategories); // - number of compiler directives per copy file long[] compilerDirectivesPerCopyFileCountDistributionCategories = { 0, 5, 10, 15, 20, 30, 50, 75, 100, 200, int.MaxValue }; StatsCounter <CompilerDirectiveType> compilerDirectivesPerCopyFileCounter = new StatsCounter <CompilerDirectiveType>(compilerDirectivesPerCopyFileCountDistributionCategories); // 4. Language models // - language model to predict the next word in a program LanguageModelGenerator languageModelForProgram = new LanguageModelGenerator(); // - language model to predict the next word in a copy LanguageModelGenerator languageModelForCopy = new LanguageModelGenerator(); // -- Compile and compute stats -- foreach (string textName in textNames) { console.Write(textName + " : compilation ... "); int programCopiesNotFound = 0; try { // Compile program FileCompiler fileCompiler = new FileCompiler(null, textName, project.SourceFileProvider, project, project.ColumnsLayout, project.CompilationOptions.Clone(), null, false, project); fileCompiler.CompileOnce(); CompilationUnit compilationResult = fileCompiler.CompilationResultsForProgram; programCopiesNotFound = 0; // Compute stats console.Write(" OK, compute stats ... "); // STATS for PROGRAM linesCounter.OnBeginProgram(); tokensCounter.OnBeginProgram(); copiesCounter.OnBeginProgram(); replacedTokensCounter.OnBeginProgram(); codeElementCounter.OnBeginProgram(); linesPerProgramFileCounter.OnBeginProgram(); tokensPerProgramFileCounter.OnBeginProgram(); compilerDirectivesPerProgramFileCounter.OnBeginProgram(); languageModelForProgram.OnBeginProgram(); // Iterate over program file lines foreach (var line in compilationResult.CodeElementsDocumentSnapshot.Lines) { // + count lines linesCounter.OnElement((int)line.Type); linesPerProgramFileCounter.OnElement((int)line.Type); // Use symbol information known at parsing time for the tokens to build a language model if (line.CodeElements != null) { foreach (var codeElement in line.CodeElements) { if (codeElement.SymbolInformationForTokens.Count > 0) { languageModelForProgram.AddSymbolInformationForTokens(codeElement.SymbolInformationForTokens); } } } if (line.ImportedDocuments != null) { var symbolInformationForTokens = new Dictionary <Token, SymbolInformation>(); foreach (var copyDirective in line.ImportedDocuments.Keys) { if (copyDirective.TextNameSymbol != null) { symbolInformationForTokens.Add(copyDirective.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective.TextNameSymbol), SymbolType.TextName)); } if (copyDirective.LibraryNameSymbol != null) { symbolInformationForTokens.Add(copyDirective.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective.LibraryNameSymbol), SymbolType.LibraryName)); } } languageModelForProgram.AddSymbolInformationForTokens(symbolInformationForTokens); } // Iterate over tokens on this line foreach (var token in line.SourceTokens) { // + count tokens and build language model tokensPerProgramFileCounter.OnElement((int)token.TokenType); languageModelForProgram.OnToken(token); } // Iterate over compiler directives on this line if (line.HasCompilerDirectives) { foreach (var token in line.TokensWithCompilerDirectives) { CompilerDirectiveToken compilerDirectiveToken = token as CompilerDirectiveToken; if (compilerDirectiveToken != null) { compilerDirectivesPerProgramFileCounter.OnElement((int)compilerDirectiveToken.CompilerDirective.Type); } } } // Iterate over COPY directives on this line if (line.ImportedDocuments != null) { foreach (CopyDirective copyDirective in line.ImportedDocuments.Keys) { // + count COPY directives CopyDirectiveType copyDirectiveType = CopyDirectiveType.Copy; #if EUROINFO_RULES if (copyDirective.InsertSuffixChar) { copyDirectiveType = CopyDirectiveType.CopyReplacingRemarks; } else if (copyDirective.RemoveFirst01Level) { copyDirectiveType = CopyDirectiveType.CopyRemarks; } #endif if (copyDirective.ReplaceOperations != null && copyDirective.ReplaceOperations.Count > 0) { copyDirectiveType = CopyDirectiveType.CopyReplacing; } copiesCounter.OnElement((int)copyDirectiveType); var importedDocument = line.ImportedDocuments[copyDirective]; if (importedDocument == null) { // + count missing COPY files for this program programCopiesNotFound++; } else { // + count references to copy files // AND check if copy file has already been analyzed string copyFileReference = copyDirective.LibraryName + ":" + copyDirective.TextName; if (copyFileReferenceCount.ContainsKey(copyFileReference)) { copyFileReferenceCount[copyFileReference] = copyFileReferenceCount[copyFileReference] + 1; // Iterate over copy file lines foreach (var copyLine in importedDocument.SourceDocument.Lines) { // + count lines inside COPY file linesCounter.OnElement((int)copyLine.Type); linesPerCopyFileCounter.OnElement((int)copyLine.Type); } } else { copyFileReferenceCount.Add(copyFileReference, 1); // STATS FOR COPY linesPerCopyFileCounter.OnBeginProgram(); tokensPerCopyFileCounter.OnBeginProgram(); compilerDirectivesPerCopyFileCounter.OnBeginProgram(); languageModelForCopy.OnBeginProgram(); // Iterate over copy file lines foreach (var copyLine in importedDocument.SourceDocument.Lines) { // + count lines inside COPY file linesCounter.OnElement((int)copyLine.Type); linesPerCopyFileCounter.OnElement((int)copyLine.Type); // Use symbol information known at parsing time for the tokens to build a language model if (copyLine.ImportedDocuments != null) { var symbolInformationForTokens = new Dictionary <Token, SymbolInformation>(); foreach (var copyDirective2 in line.ImportedDocuments.Keys) { if (copyDirective2.TextNameSymbol != null) { symbolInformationForTokens.Add(copyDirective2.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective2.TextNameSymbol), SymbolType.TextName)); } if (copyDirective2.LibraryNameSymbol != null) { symbolInformationForTokens.Add(copyDirective2.TextNameSymbol, new ExternalName(new AlphanumericValue(copyDirective2.LibraryNameSymbol), SymbolType.LibraryName)); } } languageModelForCopy.AddSymbolInformationForTokens(symbolInformationForTokens); } // Iterate over tokens on this line foreach (var token in copyLine.SourceTokens) { // + count tokens and build language model tokensPerCopyFileCounter.OnElement((int)token.TokenType); languageModelForCopy.OnToken(token); } // Iterate over compiler directives on this line if (copyLine.HasCompilerDirectives) { foreach (var token in copyLine.TokensWithCompilerDirectives) { CompilerDirectiveToken compilerDirectiveToken = token as CompilerDirectiveToken; if (compilerDirectiveToken != null) { compilerDirectivesPerCopyFileCounter.OnElement((int)compilerDirectiveToken.CompilerDirective.Type); } } } } linesPerCopyFileCounter.OnEndProgram(); tokensPerCopyFileCounter.OnEndProgram(); compilerDirectivesPerCopyFileCounter.OnEndProgram(); } } } } // Iterate over code elements on this line if (line.CodeElements != null) { foreach (var codeElement in line.CodeElements) { codeElementCounter.OnElement((int)codeElement.Type); } } } // Iterate over tokens AFTER preprocessing ITokensLinesIterator processedTokensIterator = compilationResult.ProcessedTokensDocumentSnapshot.ProcessedTokens; Token processedToken = null; while ((processedToken = processedTokensIterator.NextToken()) != Token.END_OF_FILE) { tokensCounter.OnElement((int)processedToken.TokenType); ReplacedToken replacedToken = processedToken as ReplacedToken; if (replacedToken != null) { replacedTokensCounter.OnElement((int)replacedToken.OriginalToken.TokenType); } else if (processedToken is ReplacedTokenGroup) { replacedTokensCounter.OnElement((int)TokenType.ContinuationTokenGroup); } } linesCounter.OnEndProgram(); tokensCounter.OnEndProgram(); copiesCounter.OnEndProgram(); replacedTokensCounter.OnEndProgram(); codeElementCounter.OnEndProgram(); linesPerProgramFileCounter.OnEndProgram(); tokensPerProgramFileCounter.OnEndProgram(); compilerDirectivesPerProgramFileCounter.OnEndProgram(); } catch (Exception e) { console.WriteLine("ERROR :"); console.WriteLine(e.Message); } finally { console.Write("FINISHED"); if (programCopiesNotFound == 0) { console.WriteLine(); } else { console.WriteLine(" (" + programCopiesNotFound + " missing COPY)"); } } } // Compute language models languageModelForProgram.ComputeProbabilities(); languageModelForCopy.ComputeProbabilities(); // Write results files console.WriteLine(""); console.WriteLine("Writing statistics results to " + countersFile); using (StreamWriter writer = new StreamWriter(countersFile)) { writer.WriteLine("1. Program analysis after preprocessing"); writer.WriteLine(); WriteTitle(writer, "Total number of lines per program (including expanded COPY directives)"); linesCounter.DisplayResults(writer); WriteTitle(writer, "Total number of tokens per program (after preprocessing)"); tokensCounter.DisplayResults(writer); WriteTitle(writer, "Number of copies per program"); copiesCounter.DisplayResults(writer); WriteTitle(writer, "Number of replaced tokens per program"); replacedTokensCounter.DisplayResults(writer); WriteTitle(writer, "Number of code elements per program"); codeElementCounter.DisplayResults(writer); writer.WriteLine("2. Program files before preprocessing"); writer.WriteLine(); WriteTitle(writer, "Number of lines per program file (before preprocessing)"); linesPerProgramFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of tokens per program file (before preprocessing)"); tokensPerProgramFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of compiler directives per program file"); compilerDirectivesPerProgramFileCounter.DisplayResults(writer); writer.WriteLine("3. Copy files before preprocessing"); writer.WriteLine(); WriteTitle(writer, "Number of references to each copy file"); // copyFileReferenceCount = new Dictionary<string, int>() WriteTitle(writer, "Number of lines per copy file (before preprocessing)"); linesPerCopyFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of tokens per copy file (before preprocessing)"); tokensPerCopyFileCounter.DisplayResults(writer); WriteTitle(writer, "Number of compiler directives per copy file"); compilerDirectivesPerCopyFileCounter.DisplayResults(writer); } console.WriteLine("Done"); console.WriteLine("Writing language model for program to " + languageModelForProgramFile); using (StreamWriter writer = new StreamWriter(languageModelForProgramFile)) { languageModelForProgram.WriteModelFile(writer, console); } console.WriteLine("Writing language model for copy to " + languageModelForCopyFile); using (StreamWriter writer = new StreamWriter(languageModelForCopyFile)) { languageModelForCopy.WriteModelFile(writer, console); } }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh IEnumerator <ParseSection> parseSectionsEnumerator = null; ParseSection currentParseSection = null; if (refreshParseSections != null) { // Get the first code section we need to refresh parseSectionsEnumerator = refreshParseSections.GetEnumerator(); parseSectionsEnumerator.MoveNext(); currentParseSection = parseSectionsEnumerator.Current; // Seek just before the next code element starting token tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // Parse a list of code elements for each parse section while advancing in the underlying token stream do { // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { codeElementsParseTree = cobolParser.cobolCodeElements(); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.InitialLineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.InitialLineIndex, codeElementsLine)); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: if (tokenStream.Lt(1) != null) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tokenStream.Lt(1)); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); if (codeElement.Diagnostics != null) { foreach (Diagnostic d in codeElement.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); codeElementsLine.AddParserDiagnostic(d); } } } } // In case of incremental parsing, directly jump to next parse section in the token stream // Else, simply start parsing the next CodeElement beginning with the next token if (currentParseSection != null) { // Adavance to the next ParseSection if (parseSectionsEnumerator.MoveNext()) { currentParseSection = parseSectionsEnumerator.Current; tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // No more section to parse else { break; } } }while (tokenStream.La(1) >= 0); return(codeElementsLinesChanges); }
/// <summary> /// Implement REPLACE directives on top of a CopyTokensLinesIterator /// </summary> public ReplaceTokensLinesIterator(ITokensLinesIterator sourceIterator, TypeCobolOptions compilerOptions) { this.sourceIterator = sourceIterator; CompilerOptions = compilerOptions; }
/// <summary> /// Shortcut method : current line index in the main document being parsed /// </summary> /// <returns></returns> protected int GetCurrentLineIndexInMainDocument() { ITokensLinesIterator tokensLinesIterator = ((TokensLinesTokenSource)((ITokenStream)InputStream).TokenSource).TokensIterator; return(tokensLinesIterator.LineIndexInMainDocument); }
/// <summary> /// Get next token or EndOfFile /// </summary> public Token NextToken() { // If the document is empty or after end of file, immediately return EndOfFile if (currentLine == null) { currentPosition.CurrentToken = Token.END_OF_FILE; return(Token.END_OF_FILE); } // If the iterator is positioned in an imported document, return the next imported token if (currentPosition.ImportedDocumentIterator != null) { Token nextImportedToken = currentPosition.ImportedDocumentIterator.NextToken(); if (nextImportedToken == Token.END_OF_FILE) { currentPosition.ImportedDocumentIterator = null; currentPosition.ImportedDocumentIteratorPosition = null; } else { currentPosition.CurrentToken = nextImportedToken; //#235 var copyDirective = (CopyDirective)((CompilerDirectiveToken)currentTokenInMainDocument).CompilerDirective; return(new ImportedToken(nextImportedToken, copyDirective)); } } // While we can find a next token currentTokenInMainDocument = null; while (currentTokenInMainDocument == null) { // try to find the next token on the same line currentPosition.TokenIndexInLine++; // but if we reached the end of the current line ... while (currentPosition.TokenIndexInLine >= currentLine.TokensWithCompilerDirectives.Count) { // .. advance to next line currentPosition.LineIndex++; currentPosition.TokenIndexInLine = 0; if (currentPosition.LineIndex < tokensLines.Count) { currentLine = tokensLines[currentPosition.LineIndex]; } // and if we reached the last line of the document ... else { // return EndOfFile currentLine = null; currentPosition.CurrentToken = Token.END_OF_FILE; return(Token.END_OF_FILE); } } // Check if the next token found matches the filter criteria or is a COPY compiler directive or is a REPLACE directive Token nextTokenCandidate = currentLine.TokensWithCompilerDirectives[currentPosition.TokenIndexInLine]; if (nextTokenCandidate.Channel == channelFilter || nextTokenCandidate.TokenType == TokenType.CopyImportDirective || nextTokenCandidate.TokenType == TokenType.ReplaceDirective) { currentTokenInMainDocument = nextTokenCandidate; } } // Check if the next token is a COPY import compiler directive if (currentTokenInMainDocument.TokenType == TokenType.CopyImportDirective) { // Get next token in the imported document var compilerDirective = (CopyDirective)((CompilerDirectiveToken)currentTokenInMainDocument).CompilerDirective; ImportedTokensDocument importedDocument = currentLine.ImportedDocuments[compilerDirective]; if (importedDocument != null) { ITokensLinesIterator importedDocumentIterator = importedDocument.GetProcessedTokensIterator(); Token nextTokenCandidate = importedDocumentIterator.NextToken(); // No suitable next token found in the imported document // -> get next token in the main document if (nextTokenCandidate == Token.END_OF_FILE) { return(NextToken()); } // Start iterating in the imported document else { currentPosition.ImportedDocumentIterator = importedDocumentIterator; currentPosition.CurrentToken = nextTokenCandidate; //#235 return(new ImportedToken(nextTokenCandidate, compilerDirective)); } } // The reference to the ImportedDocument could not be resolved (error in an earlier phase) // -> get next token in the main document (fallback) else { return(NextToken()); } } else { currentPosition.CurrentToken = currentTokenInMainDocument; return(currentTokenInMainDocument); } }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, PerfStatsForParserInvocation perfStatsForParserInvocation) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; ParseSection largestRefreshParseSection = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null && processedTokensLinesChanges.Count > 0) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } if (refreshParseSections != null) { //After getting all the parts refreshed, get the largest part that has been refreshed var minParseSection = refreshParseSections.OrderBy(p => p.StartLineIndex).First(); var maxParseSection = refreshParseSections.OrderByDescending(p => p.StopLineIndex).First(); largestRefreshParseSection = new ParseSection(minParseSection.StartLineIndex, minParseSection.StartToken, maxParseSection.StopLineIndex, maxParseSection.StopToken, maxParseSection.StopTokenIsFirstTokenOfTheLine); } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(cobolParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations cobolParser = new CodeElementsTracingParser(tokenStream); var tokensCountIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, tokensCountIterator); } // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh if (largestRefreshParseSection != null) { // Seek just before the next code element starting token tokenStream.SeekToToken(largestRefreshParseSection.StartToken); tokenStream.StartLookingForStopToken(largestRefreshParseSection.StopToken); //Remove all the code elements for the future line to parse. for (int i = largestRefreshParseSection.StartLineIndex; i < (largestRefreshParseSection.StopLineIndex == documentLines.Count - 1 && largestRefreshParseSection.StopToken == null //If the last index is equals to number of line in document, make sure to also reset the last line, otherwise, reset lines normally. ? largestRefreshParseSection.StopLineIndex + 1 : largestRefreshParseSection.StopLineIndex); i++) { if (documentLines[i].CodeElements != null) { documentLines[i].ResetCodeElements(); } } } // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } codeElementsParseTree = cobolParser.cobolCodeElements(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(codeElementsParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); if (codeElementsLine == null) { continue; } // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.LineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add( new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.LineIndex, codeElementsLine)); perfStatsForParserInvocation.OnStartTreeBuilding(); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code, ex)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: var tempToken = tokenStream.Lt(1); if (tempToken != null && tempToken != Token.END_OF_FILE) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tempToken); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(d); } } } } perfStatsForParserInvocation.OnStopTreeBuilding(); } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(cobolParser.ParseInfo.DecisionInfo, (int)(cobolParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } return(codeElementsLinesChanges); }