/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, PerfStatsForParserInvocation perfStatsForParserInvocation) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; ParseSection largestRefreshParseSection = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null && processedTokensLinesChanges.Count > 0) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } if (refreshParseSections != null) { //After getting all the parts refreshed, get the largest part that has been refreshed var minParseSection = refreshParseSections.OrderBy(p => p.StartLineIndex).First(); var maxParseSection = refreshParseSections.OrderByDescending(p => p.StopLineIndex).First(); largestRefreshParseSection = new ParseSection(minParseSection.StartLineIndex, minParseSection.StartToken, maxParseSection.StopLineIndex, maxParseSection.StopToken, maxParseSection.StopTokenIsFirstTokenOfTheLine); } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(cobolParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations cobolParser = new CodeElementsTracingParser(tokenStream); var tokensCountIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, tokensCountIterator); } // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh if (largestRefreshParseSection != null) { // Seek just before the next code element starting token tokenStream.SeekToToken(largestRefreshParseSection.StartToken); tokenStream.StartLookingForStopToken(largestRefreshParseSection.StopToken); //Remove all the code elements for the future line to parse. for (int i = largestRefreshParseSection.StartLineIndex; i < (largestRefreshParseSection.StopLineIndex == documentLines.Count - 1 && largestRefreshParseSection.StopToken == null //If the last index is equals to number of line in document, make sure to also reset the last line, otherwise, reset lines normally. ? largestRefreshParseSection.StopLineIndex + 1 : largestRefreshParseSection.StopLineIndex); i++) { if (documentLines[i].CodeElements != null) { documentLines[i].ResetCodeElements(); } } } // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } codeElementsParseTree = cobolParser.cobolCodeElements(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(codeElementsParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); if (codeElementsLine == null) { continue; } // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.LineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add( new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.LineIndex, codeElementsLine)); perfStatsForParserInvocation.OnStartTreeBuilding(); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code, ex)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: var tempToken = tokenStream.Lt(1); if (tempToken != null && tempToken != Token.END_OF_FILE) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tempToken); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(d); } } } } perfStatsForParserInvocation.OnStopTreeBuilding(); } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(cobolParser.ParseInfo.DecisionInfo, (int)(cobolParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } return(codeElementsLinesChanges); }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh IEnumerator <ParseSection> parseSectionsEnumerator = null; ParseSection currentParseSection = null; if (refreshParseSections != null) { // Get the first code section we need to refresh parseSectionsEnumerator = refreshParseSections.GetEnumerator(); parseSectionsEnumerator.MoveNext(); currentParseSection = parseSectionsEnumerator.Current; // Seek just before the next code element starting token tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // Parse a list of code elements for each parse section while advancing in the underlying token stream do { // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { codeElementsParseTree = cobolParser.cobolCodeElements(); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.InitialLineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.InitialLineIndex, codeElementsLine)); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: if (tokenStream.Lt(1) != null) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tokenStream.Lt(1)); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); if (codeElement.Diagnostics != null) { foreach (Diagnostic d in codeElement.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); codeElementsLine.AddParserDiagnostic(d); } } } } // In case of incremental parsing, directly jump to next parse section in the token stream // Else, simply start parsing the next CodeElement beginning with the next token if (currentParseSection != null) { // Adavance to the next ParseSection if (parseSectionsEnumerator.MoveNext()) { currentParseSection = parseSectionsEnumerator.Current; tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // No more section to parse else { break; } } }while (tokenStream.La(1) >= 0); return(codeElementsLinesChanges); }