/// <summary> /// Incremental scan of a set of text lines changes /// </summary> internal static IList <DocumentChange <ITokensLine> > ScanTextLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <TokensLine> documentLines, IList <DocumentChange <ICobolTextLine> > textLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, List <RemarksDirective.TextNameVariation> copyTextNameVariations) { return(ScanTextLinesChanges(textSourceInfo, documentLines, textLinesChanges, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, null)); }
/// <summary> /// Illustration : lines with directives continued from previous line or with a continuation on next line (before update) are marked with a cross /// [ ] /// [ x] /// [x x] /// [x ] /// [ ] /// A DocumentChange intersects with a previously parsed multiline compiler directive if : /// * LineInserted : /// - the next line was a continuation /// * LineUpdated / LineRemoved : /// - the previous line was continued /// - the next line was a continuation /// When navigating to previous or next line searching for a continuation, we must ignore all fresh insertions / updates. /// We must then reset all processed tokens lines involved in a multiline compiler directive. /// </summary> private static int CheckIfAdjacentLinesNeedRefresh(DocumentChangeType changeType, int lineIndex, ISearchableReadOnlyList <ProcessedTokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, int lastLineIndexReset) { // Navigate backwards to the start of the multiline compiler directive if (lineIndex > 0) { int previousLineIndex = lineIndex; IEnumerator <ProcessedTokensLine> reversedEnumerator = documentLines.GetEnumerator(previousLineIndex - 1, -1, true); while (reversedEnumerator.MoveNext() && (--previousLineIndex > lastLineIndexReset)) { // Get the previous line until a non continued line is encountered ProcessedTokensLine previousLine = reversedEnumerator.Current; // A reset line was already treated by the previous call to CheckIfAdjacentLinesNeedRefresh : stop searching if (previousLine.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) { break; } // Previous line is a continuation : reset this line and continue navigating backwards // Previous line is not a continuation but is continued : reset this line and stop navigating backwards else if (previousLine.HasDirectiveTokenContinuationFromPreviousLine || previousLine.HasDirectiveTokenContinuedOnNextLine) { lineIndex = previousLineIndex; previousLine = (ProcessedTokensLine)prepareDocumentLineForUpdate(previousLineIndex, previousLine, CompilationStep.Preprocessor); processedTokensLinesChanges.Add(new DocumentChange <IProcessedTokensLine>(DocumentChangeType.LineUpdated, previousLineIndex, previousLine)); if (!previousLine.HasDirectiveTokenContinuationFromPreviousLine) { break; } } // Previous line not involved in a multiline compiler directive : stop searching else { break; } } } // Navigate forwards to the end of the multiline compiler directive if (lineIndex < (documentLines.Count - 1)) { int nextLineIndex = lineIndex; IEnumerator <ProcessedTokensLine> enumerator = documentLines.GetEnumerator(nextLineIndex + 1, -1, true); while (enumerator.MoveNext()) { // Get the next line until non continuation line is encountered nextLineIndex++; ProcessedTokensLine nextLine = enumerator.Current; // A reset line will be treated by the next call to CheckIfAdjacentLinesNeedRefresh : stop searching if (nextLine.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) { break; } // Next line is a continuation and is continued: reset this line and continue navigating forwards // Next line is a continuation but is not continued : reset this line and stop navigating forwards else if (nextLine.HasDirectiveTokenContinuationFromPreviousLine) { nextLine = (ProcessedTokensLine)prepareDocumentLineForUpdate(nextLineIndex, nextLine, CompilationStep.Preprocessor); processedTokensLinesChanges.Add(new DocumentChange <IProcessedTokensLine>(DocumentChangeType.LineUpdated, nextLineIndex, nextLine)); lastLineIndexReset = nextLineIndex; if (!nextLine.HasDirectiveTokenContinuedOnNextLine) { break; } } // Previous line not involved in a multiline compiler directive : stop searching else { break; } } } return(lastLineIndexReset > lineIndex ? lastLineIndexReset : lineIndex); }
/// <summary> /// Incremental preprocessing of a set of tokens lines changes /// </summary> internal static IList <DocumentChange <IProcessedTokensLine> > ProcessTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <ProcessedTokensLine> documentLines, IList <DocumentChange <ITokensLine> > tokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, IProcessedTokensDocumentProvider processedTokensDocumentProvider, List <RemarksDirective.TextNameVariation> copyTextNameVariations, PerfStatsForParserInvocation perfStatsForParserInvocation, List <CopyDirective> missingCopies) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges = new List <DocumentChange <IProcessedTokensLine> >(); // There are 2 reasons to a preprocess a tokens line after a change : // 1. A tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline compiler directive, the whole group of lines must be parsed again // Then, if a new COPY directive was parsed : the CompilationDocument to include must be prepared // --- PREPARATION PHASE : reset all processed tokens lines which were involved in a multiline compiler directive where at least one line changed --- // Iterate over all tokens changes detected by the ScannerStep : // refresh all the adjacent lines participating in a ContinuationTokensGroup if (tokensLinesChanges != null) { int lastLineIndexReset = -1; foreach (DocumentChange <ITokensLine> tokensChange in tokensLinesChanges) { processedTokensLinesChanges.Add(new DocumentChange <IProcessedTokensLine>(tokensChange.Type, tokensChange.LineIndex, (IProcessedTokensLine)tokensChange.NewLine)); if (tokensChange.LineIndex > lastLineIndexReset) { lastLineIndexReset = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, processedTokensLinesChanges, lastLineIndexReset); } } } // --- COMPILER DIRECTIVES PHASE : Find and parse all compiler directives --- // Init. Prepare a compiler directive parser // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( textSourceInfo.Name, documentLines, null, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init a compiler directive parser CommonTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CobolCompilerDirectivesParser directivesParser = new CobolCompilerDirectivesParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(directivesParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations directivesParser = new CobolCompilerDirectivesTracingParser(tokenStream); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, null); } IAntlrErrorStrategy compilerDirectiveErrorStrategy = new CompilerDirectiveErrorStrategy(); directivesParser.ErrorHandler = compilerDirectiveErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); directivesParser.RemoveErrorListeners(); directivesParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CompilerDirectiveBuilder directiveBuilder = new CompilerDirectiveBuilder(compilerOptions, copyTextNameVariations); // 1. Iterate over all compiler directive starting tokens found in the lines which were updated foreach (Token compilerDirectiveStartingToken in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) .SelectMany(line => line.SourceTokens) .Where(token => token.TokenFamily == TokenFamily.CompilerDirectiveStartingKeyword)) { // 2. Reset the compiler directive parser state // Reset tokens iterator position before parsing // -> seek just before the compiler directive starting token tokensIterator.SeekToToken(compilerDirectiveStartingToken); tokensIterator.PreviousToken(); // Special case : for efficiency reasons, in EXEC SQL INCLUDE directives // only the third token INCLUDE is recognized as a compiler directive // starting keyword by the scanner. In this case, we must rewind the // iterator two tokens backwards to start parsing just before the EXEC token. if (compilerDirectiveStartingToken.TokenType == TokenType.EXEC_SQL_INCLUDE) { tokensIterator.PreviousToken(); tokensIterator.PreviousToken(); } // Reset Antlr BufferedTokenStream position tokenStream.SetTokenSource(tokenSource); // Reset parsing error diagnostics compilerDirectiveErrorStrategy.Reset(directivesParser); // 3. Try to parse a compiler directive starting with the current token perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } CobolCompilerDirectivesParser.CompilerDirectingStatementContext directiveParseTree = directivesParser.compilerDirectingStatement(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(directiveParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); perfStatsForParserInvocation.OnStartTreeBuilding(); // 4. Visit the parse tree to build a first class object representing the compiler directive walker.Walk(directiveBuilder, directiveParseTree); CompilerDirective compilerDirective = directiveBuilder.CompilerDirective; bool errorFoundWhileParsingDirective = compilerDirective == null || compilerDirective.Diagnostics != null || directiveParseTree.Diagnostics != null; // 5. Get all tokens consumed while parsing the compiler directive // and partition them line by line Token startToken = (Token)directiveParseTree.Start; Token stopToken = (Token)directiveParseTree.Stop; if (stopToken == null) { stopToken = startToken; } MultilineTokensGroupSelection tokensSelection = tokensIterator.SelectAllTokensBetween(startToken, stopToken); if (compilerDirective != null) { // 6. Replace all matched tokens by : // - a CompilerDirectiveToken on the first line ProcessedTokensLine firstProcessedTokensLine = documentLines[tokensSelection.FirstLineIndex]; if (tokensSelection.SelectedTokensOnSeveralLines.Length == 1) { firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], tokensSelection.TokensOnLastLineAfterStopToken, false); } else { TokensGroup continuedTokensGroup = firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], null, true); // - a ContinuationTokensGroup on the following lines int selectionLineIndex = 1; int lastLineIndex = tokensSelection.FirstLineIndex + tokensSelection.SelectedTokensOnSeveralLines.Length - 1; for (int nextLineIndex = tokensSelection.FirstLineIndex + 1; nextLineIndex <= lastLineIndex; nextLineIndex++, selectionLineIndex++) { IList <Token> compilerDirectiveTokensOnNextLine = tokensSelection.SelectedTokensOnSeveralLines[selectionLineIndex]; if (compilerDirectiveTokensOnNextLine.Count > 0) { ProcessedTokensLine nextProcessedTokensLine = documentLines[nextLineIndex]; if (nextLineIndex != lastLineIndex) { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, null, true); } else { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, tokensSelection.TokensOnLastLineAfterStopToken, false); } } } } } // 7. Register compiler directive parse errors if (errorFoundWhileParsingDirective) { ProcessedTokensLine compilerDirectiveLine = documentLines[tokensSelection.FirstLineIndex]; if (compilerDirective != null && compilerDirective.Diagnostics != null) { foreach (Diagnostic directiveDiag in compilerDirective.Diagnostics) { compilerDirectiveLine.AddDiagnostic(directiveDiag); } } else if (directiveParseTree.Diagnostics != null) { foreach (Diagnostic directiveDiag in directiveParseTree.Diagnostics) { if (compilerDirective != null) { compilerDirective.AddDiagnostic(directiveDiag); } compilerDirectiveLine.AddDiagnostic(directiveDiag); } } } } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(directivesParser.ParseInfo.DecisionInfo, (int)(directivesParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } // 8. Advance the state off all ProcessedTokensLines : // NeedsCompilerDirectiveParsing => NeedsCopyDirectiveProcessing if it contains a COPY directive IList <ProcessedTokensLine> parsedLinesWithCopyDirectives = null; // NeedsCompilerDirectiveParsing => Ready otherwise foreach (ProcessedTokensLine parsedLine in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing)) { if (parsedLine.ImportedDocuments != null) { if (parsedLinesWithCopyDirectives == null) { parsedLinesWithCopyDirectives = new List <ProcessedTokensLine>(); } parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.NeedsCopyDirectiveProcessing; parsedLinesWithCopyDirectives.Add(parsedLine); } else { parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } perfStatsForParserInvocation.OnStopTreeBuilding(); // --- COPY IMPORT PHASE : Process COPY (REPLACING) directives --- foreach (var lineChange in processedTokensLinesChanges) { missingCopies.Remove(missingCopies.FirstOrDefault(c => c.COPYToken.Line == lineChange.LineIndex + 1)); } // 1. Iterate over all updated lines containing a new COPY directive if (parsedLinesWithCopyDirectives != null) { foreach (ProcessedTokensLine tokensLineWithCopyDirective in parsedLinesWithCopyDirectives) { // Iterate over all COPY directives found on one updated line foreach (CopyDirective copyDirective in tokensLineWithCopyDirective.ImportedDocuments.Keys.Where(c => c.TextName != null || c.COPYToken.TokenType == TokenType.EXEC).ToArray()) { try { PerfStatsForImportedDocument perfStats; // Load (or retrieve in cache) the document referenced by the COPY directive //Issue #315: tokensLineWithCopyDirective.ScanState must be passed because special names paragraph such as "Decimal point is comma" are declared in the enclosing program and can affect the parsing of COPY ProcessedTokensDocument importedDocumentSource = processedTokensDocumentProvider.GetProcessedTokensDocument(copyDirective.LibraryName, copyDirective.TextName, tokensLineWithCopyDirective.ScanStateBeforeCOPYToken[copyDirective.COPYToken], copyTextNameVariations, out perfStats); // Store it on the current line after applying the REPLACING directive ImportedTokensDocument importedDocument = new ImportedTokensDocument(copyDirective, importedDocumentSource, perfStats); tokensLineWithCopyDirective.ImportedDocuments[copyDirective] = importedDocument; } catch (Exception e) { if (missingCopies != null && copyDirective != null && copyDirective.COPYToken != null && !missingCopies.Contains(copyDirective)) //If list already contains the copy directive just ignore { var missingCopieToReplace = missingCopies.FirstOrDefault(c => c.COPYToken.Line == copyDirective.COPYToken.Line); missingCopies.Remove(missingCopieToReplace); missingCopies.Add(copyDirective); } // Text name refenced by COPY directive was not found // => register a preprocessor error on this line Token failedDirectiveToken = tokensLineWithCopyDirective.TokensWithCompilerDirectives .First( token => token.TokenType == TokenType.CopyImportDirective && ((CompilerDirectiveToken)token).CompilerDirective == copyDirective); Diagnostic diag = new Diagnostic( MessageCode.FailedToLoadTextDocumentReferencedByCopyDirective, failedDirectiveToken.Column, failedDirectiveToken.EndColumn, failedDirectiveToken.Line, e.Message, e); tokensLineWithCopyDirective.AddDiagnostic(diag); } } // Advance processing status of the line tokensLineWithCopyDirective.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } // --- REPLACE PHASE : REPLACE directives are implemented in ReplaceTokensLinesIterator --- /* Algorithm : * * one REPLACE directive can express several replacement operations * one replacement operation can be of several types (distinguished for optimization purposes) * - SimpleTokenReplace : one source token / zero or one replacement token * - PartialWordReplace : one pure partial word / zero or one replacement token * - SimpleToMultipleTokenReplace : one source token / several replacement tokens * - MultipleTokenReplace : one first + several following source tokens / zero to many replacement tokens * * an iterator maintains a current set of replacement operations * * if nextToken is replace directive * the current set of replacement operations is updated * else * nextToken is compared to each replacement operation in turn * if single -> single source token operation : return replacement token * if single -> multiple operation : setup a secondary iterator with the list of replacement tokens * if multiple -> multiple operation * snapshot of the underlying iterator * try to match all of the following source tokens * if failure : restore snapshot and try next operation * if success : setup a secondary iterator * * token comparison sourceToken / replacementCandidate : * 1. Compare Token type * 2. If same token type and for families * AlphanumericLiteral * NumericLiteral * SyntaxLiteral * Symbol * => compare also Token text * * PartialCobolWord replacement : * p535 : The COPY statement with REPLACING phrase can be used to replace parts of words. * By inserting a dummy operand delimited by colons into the program text, * the compiler will replace the dummy operand with the required text. * Example 3 shows how this is used with the dummy operand :TAG:. * The colons serve as separators and make TAG a stand-alone operand. */ return(processedTokensLinesChanges); }
/// <summary> /// Incremental preprocessing of a set of tokens lines changes /// </summary> internal static IList<DocumentChange<IProcessedTokensLine>> ProcessTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList<ProcessedTokensLine> documentLines, IList<DocumentChange<ITokensLine>> tokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, IProcessedTokensDocumentProvider processedTokensDocumentProvider) { // Collect all changes applied to the processed tokens lines during the incremental scan IList<DocumentChange<IProcessedTokensLine>> processedTokensLinesChanges = new List<DocumentChange<IProcessedTokensLine>>(); // There are 2 reasons to a preprocess a tokens line after a change : // 1. A tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline compiler directive, the whole group of lines must be parsed again // Then, if a new COPY directive was parsed : the CompilationDocument to include must be prepared // --- PREPARATION PHASE : reset all processed tokens lines which were involved in a multiline compiler directive where at least one line changed --- // Iterate over all tokens changes detected by the ScannerStep : // refresh all the adjacent lines participating in a ContinuationTokensGroup if (tokensLinesChanges != null) { int lastLineIndexReset = -1; foreach (DocumentChange<ITokensLine> tokensChange in tokensLinesChanges) { processedTokensLinesChanges.Add(new DocumentChange<IProcessedTokensLine>(tokensChange.Type, tokensChange.LineIndex, (IProcessedTokensLine)tokensChange.NewLine)); if (tokensChange.LineIndex > lastLineIndexReset) { lastLineIndexReset = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, processedTokensLinesChanges, lastLineIndexReset); } } } // --- COMPILER DIRECTIVES PHASE : Find and parse all compiler directives --- // Init. Prepare a compiler directive parser // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( textSourceInfo.Name, documentLines, null, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init a compiler directive parser CommonTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CobolCompilerDirectivesParser directivesParser = new CobolCompilerDirectivesParser(tokenStream); IAntlrErrorStrategy compilerDirectiveErrorStrategy = new CompilerDirectiveErrorStrategy(); directivesParser.ErrorHandler = compilerDirectiveErrorStrategy; // Register all parse errors in a list in memory DiagnosticSyntaxErrorListener errorListener = new DiagnosticSyntaxErrorListener(); directivesParser.RemoveErrorListeners(); directivesParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CompilerDirectiveBuilder directiveBuilder = new CompilerDirectiveBuilder(); // 1. Iterate over all compiler directive starting tokens found in the lines which were updated foreach (Token compilerDirectiveStartingToken in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) .SelectMany(line => line.SourceTokens) .Where(token => token.TokenFamily == TokenFamily.CompilerDirectiveStartingKeyword)) { // 2. Reset the compiler directive parser state // Reset tokens iterator position before parsing // -> seek just before the compiler directive starting token tokensIterator.SeekToToken(compilerDirectiveStartingToken); tokensIterator.PreviousToken(); // Special case : for efficiency reasons, in EXEC SQL INCLUDE directives // only the third token INCLUDE is recognized as a compiler directive // starting keyword by the scanner. In this case, we must rewind the // iterator two tokens backwards to start parsing just before the EXEC token. if (compilerDirectiveStartingToken.TokenType == TokenType.EXEC_SQL_INCLUDE) { tokensIterator.PreviousToken(); tokensIterator.PreviousToken(); } // Reset Antlr BufferedTokenStream position tokenStream.SetTokenSource(tokenSource); // Reset parsing error diagnostics compilerDirectiveErrorStrategy.Reset(directivesParser); errorListener.Diagnostics.Clear(); // 3. Try to parse a compiler directive starting with the current token CobolCompilerDirectivesParser.CompilerDirectingStatementContext directiveParseTree = directivesParser.compilerDirectingStatement(); // 4. Visit the parse tree to build a first class object representing the compiler directive walker.Walk(directiveBuilder, directiveParseTree); CompilerDirective compilerDirective = directiveBuilder.CompilerDirective; bool errorFoundWhileParsingDirective = errorListener.Diagnostics.Count > 0 || directiveBuilder.Diagnostics.Count > 0; // 5. Get all tokens consumed while parsing the compiler directive // and partition them line by line Token startToken = (Token)directiveParseTree.Start; Token stopToken = (Token)directiveParseTree.Stop; if (stopToken == null) stopToken = startToken; MultilineTokensGroupSelection tokensSelection = tokensIterator.SelectAllTokensBetween(startToken, stopToken); if (compilerDirective != null) { // 6. Replace all matched tokens by : // - a CompilerDirectiveToken on the first line ProcessedTokensLine firstProcessedTokensLine = documentLines[tokensSelection.FirstLineIndex]; if (tokensSelection.SelectedTokensOnSeveralLines.Length == 1) { firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], tokensSelection.TokensOnLastLineAfterStopToken, false); } else { TokensGroup continuedTokensGroup = firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], null, true); // - a ContinuationTokensGroup on the following lines int selectionLineIndex = 1; int lastLineIndex = tokensSelection.FirstLineIndex + tokensSelection.SelectedTokensOnSeveralLines.Length - 1; for (int nextLineIndex = tokensSelection.FirstLineIndex + 1; nextLineIndex <= lastLineIndex; nextLineIndex++, selectionLineIndex++) { IList<Token> compilerDirectiveTokensOnNextLine = tokensSelection.SelectedTokensOnSeveralLines[selectionLineIndex]; if (compilerDirectiveTokensOnNextLine.Count > 0) { ProcessedTokensLine nextProcessedTokensLine = documentLines[nextLineIndex]; if (nextLineIndex != lastLineIndex) { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, null, true); } else { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, tokensSelection.TokensOnLastLineAfterStopToken, false); } } } } } // 7. Register compiler directive parse errors if (errorFoundWhileParsingDirective) { ProcessedTokensLine compilerDirectiveLine = documentLines[tokensSelection.FirstLineIndex]; foreach (ParserDiagnostic parserDiag in errorListener.Diagnostics) { compilerDirectiveLine.AddDiagnostic(parserDiag); } foreach (Diagnostic directiveDiag in directiveBuilder.Diagnostics) { compilerDirectiveLine.AddDiagnostic(directiveDiag); } } } // 8. Advance the state off all ProcessedTokensLines : // NeedsCompilerDirectiveParsing => NeedsCopyDirectiveProcessing if it contains a COPY directive IList<ProcessedTokensLine> parsedLinesWithCopyDirectives = null; // NeedsCompilerDirectiveParsing => Ready otherwise foreach (ProcessedTokensLine parsedLine in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing)) { if (parsedLine.ImportedDocuments != null) { if(parsedLinesWithCopyDirectives == null) { parsedLinesWithCopyDirectives = new List<ProcessedTokensLine>(); } parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.NeedsCopyDirectiveProcessing; parsedLinesWithCopyDirectives.Add(parsedLine); } else { parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } // --- COPY IMPORT PHASE : Process COPY (REPLACING) directives --- // 1. Iterate over all updated lines containing a new COPY directive if (parsedLinesWithCopyDirectives != null) { foreach (ProcessedTokensLine tokensLineWithCopyDirective in parsedLinesWithCopyDirectives) { // Iterate over all COPY directives found on one updated line foreach (CopyDirective copyDirective in tokensLineWithCopyDirective.ImportedDocuments.Keys.ToArray<CopyDirective>()) { try { // Load (or retrieve in cache) the document referenced by the COPY directive ProcessedTokensDocument importedDocumentSource = processedTokensDocumentProvider.GetProcessedTokensDocument(copyDirective.LibraryName, copyDirective.TextName); // Store it on the current line after appying the REPLACING directive ImportedTokensDocument importedDocument = new ImportedTokensDocument(copyDirective, importedDocumentSource); tokensLineWithCopyDirective.ImportedDocuments[copyDirective] = importedDocument; } catch (Exception e) { // Text name refenced by COPY directive was not found // => register a preprocessor error on this line Token failedDirectiveToken = tokensLineWithCopyDirective.TokensWithCompilerDirectives .Where(token => token.TokenType == TokenType.CopyImportDirective && ((CompilerDirectiveToken)token).CompilerDirective == copyDirective) .First(); Diagnostic diag = new Diagnostic( MessageCode.FailedToLoadTextDocumentReferencedByCopyDirective, failedDirectiveToken.Column, failedDirectiveToken.EndColumn, e.Message); tokensLineWithCopyDirective.AddDiagnostic(diag); } } // Advance processing status of the line tokensLineWithCopyDirective.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } // --- REPLACE PHASE : REPLACE directives are implemented in ReplaceTokensLinesIterator --- /* Algorithm : * * one REPLACE directive can express several replacement operations * one replacement operation can be of several types (distinguished for optimization purposes) * - SimpleTokenReplace : one source token / zero or one replacement token * - PartialWordReplace : one pure partial word / zero or one replacement token * - SimpleToMultipleTokenReplace : one source token / several replacement tokens * - MultipleTokenReplace : one first + several following source tokens / zero to many replacement tokens * * an iterator maintains a current set of replacement operations * * if nextToken is replace directive * the current set of replacement operations is updated * else * nextToken is compared to each replacement operation in turn * if single -> single source token operation : return replacement token * if single -> multiple operation : setup a secondary iterator with the list of replacement tokens * if multiple -> multiple operation * snapshot of the underlying iterator * try to match all of the following source tokens * if failure : restore snapshot and try next operation * if success : setup a secondary iterator * * token comparison sourceToken / replacementCandidate : * 1. Compare Token type * 2. If same token type and for families * AlphanumericLiteral * NumericLiteral * SyntaxLiteral * Symbol * => compare also Token text * * PartialCobolWord replacement : * p535 : The COPY statement with REPLACING phrase can be used to replace parts of words. * By inserting a dummy operand delimited by colons into the program text, * the compiler will replace the dummy operand with the required text. * Example 3 shows how this is used with the dummy operand :TAG:. * The colons serve as separators and make TAG a stand-alone operand. */ return processedTokensLinesChanges; }
/// <summary> /// Illustration : lines with directives continued from previous line or with a continuation on next line (before update) are marked with a cross /// [ ] /// [ x] /// [x x] /// [x ] /// [ ] /// A DocumentChange intersects with a previously parsed multiline compiler directive if : /// * LineInserted : /// - the next line was a continuation /// * LineUpdated / LineRemoved : /// - the previous line was continued /// - the next line was a continuation /// When navigating to previous or next line searching for a continuation, we must ignore all fresh insertions / updates. /// We must then reset all processed tokens lines involved in a multiline compiler directive. /// </summary> private static int CheckIfAdjacentLinesNeedRefresh(DocumentChangeType changeType, int lineIndex, ISearchableReadOnlyList<ProcessedTokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, IList<DocumentChange<IProcessedTokensLine>> processedTokensLinesChanges, int lastLineIndexReset) { // Navigate backwards to the start of the multiline compiler directive if (lineIndex > 0) { int previousLineIndex = lineIndex; IEnumerator<ProcessedTokensLine> reversedEnumerator = documentLines.GetEnumerator(previousLineIndex - 1, -1, true); while (reversedEnumerator.MoveNext() && (--previousLineIndex > lastLineIndexReset)) { // Get the previous line until a non continued line is encountered ProcessedTokensLine previousLine = reversedEnumerator.Current; // A reset line was already treated by the previous call to CheckIfAdjacentLinesNeedRefresh : stop searching if (previousLine.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) { break; } // Previous line is a continuation : reset this line and continue navigating backwards // Previous line is not a continuation but is continued : reset this line and stop navigating backwards else if (previousLine.HasDirectiveTokenContinuationFromPreviousLine || previousLine.HasDirectiveTokenContinuedOnNextLine) { lineIndex = previousLineIndex; previousLine = (ProcessedTokensLine)prepareDocumentLineForUpdate(previousLineIndex, previousLine, CompilationStep.Preprocessor); processedTokensLinesChanges.Add(new DocumentChange<IProcessedTokensLine>(DocumentChangeType.LineUpdated, previousLineIndex, previousLine)); if(!previousLine.HasDirectiveTokenContinuationFromPreviousLine) { break; } } // Previous line not involved in a multiline compiler directive : stop searching else { break; } } } // Navigate forwards to the end of the multiline compiler directive if (lineIndex < (documentLines.Count - 1)) { int nextLineIndex = lineIndex; IEnumerator<ProcessedTokensLine> enumerator = documentLines.GetEnumerator(nextLineIndex + 1, -1, true); while (enumerator.MoveNext()) { // Get the next line until non continuation line is encountered nextLineIndex++; ProcessedTokensLine nextLine = enumerator.Current; // A reset line will be treated by the next call to CheckIfAdjacentLinesNeedRefresh : stop searching if (nextLine.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) { break; } // Next line is a continuation and is continued: reset this line and continue navigating forwards // Next line is a continuation but is not continued : reset this line and stop navigating forwards else if (nextLine.HasDirectiveTokenContinuationFromPreviousLine) { nextLine = (ProcessedTokensLine)prepareDocumentLineForUpdate(nextLineIndex, nextLine, CompilationStep.Preprocessor); processedTokensLinesChanges.Add(new DocumentChange<IProcessedTokensLine>(DocumentChangeType.LineUpdated, nextLineIndex, nextLine)); lastLineIndexReset = nextLineIndex; if (!nextLine.HasDirectiveTokenContinuedOnNextLine) { break; } } // Previous line not involved in a multiline compiler directive : stop searching else { break; } } } return lastLineIndexReset > lineIndex ? lastLineIndexReset : lineIndex; }
private static void ScanTokensLineWithMultilineScanState(int lineToScanIndex, TokensLine lineToScan, TextSourceInfo textSourceInfo, ISearchableReadOnlyList<TokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, IList<DocumentChange<ITokensLine>> tokensLinesChanges, out int nextLineToScanIndex, out TokensLine nextLineToScan) { // Scan the current line (or continuation lines group) MultilineScanState scanState = ScanTokensLineWithContinuations(lineToScanIndex, lineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, tokensLinesChanges, out nextLineToScanIndex, out nextLineToScan); // Scan the following lines until we find that the scan state at the beginning of the next line has been updated while(nextLineToScan != null && nextLineToScan.InitialScanState != null && !nextLineToScan.InitialScanState.Equals(scanState)) { scanState = ScanTokensLineWithContinuations(nextLineToScanIndex, nextLineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, tokensLinesChanges, out nextLineToScanIndex, out nextLineToScan); } }
private static MultilineScanState ScanTokensLineWithContinuations(int lineToScanIndex, TokensLine lineToScan, TextSourceInfo textSourceInfo, ISearchableReadOnlyList<TokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, IList<DocumentChange<ITokensLine>> tokensLinesChanges, out int nextLineToScanIndex, out TokensLine nextLineToScan) { // Initialize out parameters if (lineToScanIndex < (documentLines.Count - 1)) { nextLineToScanIndex = lineToScanIndex + 1; nextLineToScan = documentLines[nextLineToScanIndex]; } else { nextLineToScanIndex = -1; nextLineToScan = null; } // Check if the line to scan participates in a multiline continuation // - because it is itself a continuation line bool partOfMultilineContinuation = lineToScan.Type == CobolTextLineType.Continuation; // - or because the following line is a continuation line if (!partOfMultilineContinuation && lineToScanIndex < (documentLines.Count - 1)) { nextLineToScanIndex = lineToScanIndex + 1; nextLineToScan = documentLines[nextLineToScanIndex]; partOfMultilineContinuation = nextLineToScan.Type == CobolTextLineType.Continuation; } // ^-- LIMITATION : we don't support the case where one or more comment lines follow the current line before a continuation line // Case 1 : the line is not part of a multiline continuation => we can parse it as as a standalone line if (!partOfMultilineContinuation) { // Create a new copy of the line before the update if necessary lineToScan = (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner); if (lineToScanIndex == 0) { // Scan the first line of the document Scanner.ScanFirstLine(lineToScan, false, false, false, textSourceInfo.EncodingForAlphanumericLiterals, compilerOptions); } else { // Get the scan state at the end of the previous line TokensLine previousLine = documentLines[lineToScanIndex - 1]; // Scan the current line with this initial scan state Scanner.ScanTokensLine(lineToScan, previousLine.ScanState, compilerOptions); } tokensLinesChanges.Add(new DocumentChange<ITokensLine>(DocumentChangeType.LineUpdated, lineToScanIndex, lineToScan)); return lineToScan.ScanState; } // Case 2 : the line is part of a multiline continuation => we must parse all continuation lines as a group else { // Build a list of the lines we will have to scan as a group : IList<TokensLine> continuationLinesGroup = new List<TokensLine>(); int firstLineIndex = lineToScanIndex; continuationLinesGroup.Insert(0, (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); // Navigate backwards to the start of the multiline continuation if (lineToScan.Type == CobolTextLineType.Continuation && lineToScanIndex > 0) { int revLineToScanIndex = lineToScanIndex; IEnumerator<TokensLine> reversedEnumerator = documentLines.GetEnumerator(lineToScanIndex - 1, -1, true); while(reversedEnumerator.MoveNext()) { // Get the previous line until a non continuation and non comment line is encountered revLineToScanIndex--; lineToScan = reversedEnumerator.Current; if(lineToScan.Type != CobolTextLineType.Continuation /*&& <-- LIMITATION : this compiler does not support comment or blank lines between two continuation line lineToScan.Type != CobolTextLineType.Comment && lineToScan.Type != CobolTextLineType.Blank*/) // see p54 : for continuation, blank lines are treated like comment lines { firstLineIndex = revLineToScanIndex; continuationLinesGroup.Insert(0, (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); } else { break; } } } // Reuse our knowledge of the next line if it is available and if it is a continuation if (nextLineToScan != null && nextLineToScan.Type == CobolTextLineType.Continuation) { lineToScanIndex++; lineToScan = nextLineToScan; continuationLinesGroup.Add((TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); nextLineToScanIndex = -1; nextLineToScan = null; } // Navigate forwards to the end of the multiline continuation if (lineToScanIndex < (documentLines.Count - 1)) { IEnumerator<TokensLine> enumerator = documentLines.GetEnumerator(lineToScanIndex + 1, -1, false); while (enumerator.MoveNext()) { // Get the next line until a non continuation and non comment line is encountered lineToScanIndex++; lineToScan = enumerator.Current; if (lineToScan.Type == CobolTextLineType.Continuation /*|| <-- LIMITATION : this compiler does not support comment or blank lines between two continuation line lineToScan.Type == CobolTextLineType.Comment || lineToScan.Type == CobolTextLineType.Blank*/) // see p54 : for continuation, blank lines are treated like comment lines { // Add this line at the end of the list of continuation lines continuationLinesGroup.Add((TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); } else { // Save the knowledge of the next line and exit the loop nextLineToScanIndex = lineToScanIndex; nextLineToScan = lineToScan; break; } } } // Scan the group of continuation lines if (firstLineIndex == 0) { // Scan the first line group of the document Scanner.ScanFirstLineContinuationGroup(continuationLinesGroup, false, false, false, textSourceInfo.EncodingForAlphanumericLiterals, compilerOptions); } else { // Get the scan state at the end of the previous line TokensLine previousLine = documentLines[firstLineIndex - 1]; // Scan the current line group with this initial scan state Scanner.ScanTokensLineContinuationGroup(continuationLinesGroup, previousLine.ScanState, compilerOptions); } int updatedLineIndex = firstLineIndex; foreach(TokensLine updatedLine in continuationLinesGroup) { tokensLinesChanges.Add(new DocumentChange<ITokensLine>(DocumentChangeType.LineUpdated, updatedLineIndex, updatedLine)); updatedLineIndex++; } return continuationLinesGroup[continuationLinesGroup.Count - 1].ScanState; } }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList<DocumentChange<ICodeElementsLine>> ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList<CodeElementsLine> documentLines, IList<DocumentChange<IProcessedTokensLine>> processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the processed tokens lines during the incremental scan IList<DocumentChange<ICodeElementsLine>> codeElementsLinesChanges = new List<DocumentChange<ICodeElementsLine>>(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList<ParseSection> refreshParseSections = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List<ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange<IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // -> activate full ambiguities detection //parser.Interpreter.PredictionMode = PredictionMode.LlExactAmbigDetection; IAntlrErrorStrategy cobolErrorStrategy = new CobolErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory DiagnosticSyntaxErrorListener errorListener = new DiagnosticSyntaxErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh IEnumerator<ParseSection> parseSectionsEnumerator = null; ParseSection currentParseSection = null; if (refreshParseSections != null) { // Get the first code section we need to refresh parseSectionsEnumerator = refreshParseSections.GetEnumerator(); parseSectionsEnumerator.MoveNext(); currentParseSection = parseSectionsEnumerator.Current; // Seek just before the next code element starting token tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // Parse one CodeElement at a time while advancing in the underlying token stream do { // Reset parsing error diagnostics //cobolErrorStrategy.Reset(cobolParser); errorListener.Diagnostics.Clear(); // Reset parser traces (consumed tokens) cobolParser.ResetTraces(); // Try to parse a code element starting with the current token CodeElementsParser.CodeElementContext codeElementParseTree = cobolParser.codeElement(); // If the parse tree is not empty if (codeElementParseTree.Start.Type > 0) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine; var importedToken = tokenStart as ImportedToken; if (importedToken != null) { codeElementsLine = (CodeElementsLine) importedToken.CopyDirective.TextNameSymbol.TokensLine; } else { codeElementsLine = ((CodeElementsLine) tokenStart.TokensLine); } // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.InitialLineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.InitialLineIndex, codeElementsLine)); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = Diagnostics.MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0){ start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count-1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start,stop,line, null, code)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element codeElement.ConsumedTokens = cobolParser.ConsumedTokens; if (codeElement.ConsumedTokens == null) {// ISSUE #204: if (tokenStream.Lt(1) != null) {// if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tokenStream.Lt(1)); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); foreach (ParserDiagnostic d in errorListener.Diagnostics) { codeElement.Diagnostics.Add(d); } foreach (Diagnostic d in codeElement.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } else { foreach (ParserDiagnostic d in errorListener.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } } // In case of incremental parsing, directly jump to next parse section in the token stream // Else, simply start parsing the next CodeElement beginning with the next token if (currentParseSection != null) { // Check if we reached the end of the current ParseSection if (tokenStream.StreamReachedStopToken) { // Adavance to the next ParseSection if (parseSectionsEnumerator.MoveNext()) { currentParseSection = parseSectionsEnumerator.Current; tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // No more section to parse else { break; } } } } while (tokenStream.La(1) >= 0); return codeElementsLinesChanges; }
/// <summary> /// Incremental scan of a set of text lines changes /// </summary> internal static IList<DocumentChange<ITokensLine>> ScanTextLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList<TokensLine> documentLines, IList<DocumentChange<ICobolTextLine>> textLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { return ScanTextLinesChanges(textSourceInfo, documentLines, textLinesChanges, prepareDocumentLineForUpdate, compilerOptions, null); }
private static MultilineScanState ScanTokensLineWithContinuations(int lineToScanIndex, TokensLine lineToScan, TextSourceInfo textSourceInfo, ISearchableReadOnlyList <TokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, List <RemarksDirective.TextNameVariation> copyTextNameVariations, IList <DocumentChange <ITokensLine> > tokensLinesChanges, [CanBeNull] MultilineScanState scanState, out int nextLineToScanIndex, out TokensLine nextLineToScan) { // Initialize out parameters if (lineToScanIndex < (documentLines.Count - 1)) { nextLineToScanIndex = lineToScanIndex + 1; nextLineToScan = documentLines[nextLineToScanIndex]; } else { nextLineToScanIndex = -1; nextLineToScan = null; } // Check if the line to scan participates in a multiline continuation // - because it is itself a continuation line bool partOfMultilineContinuation = lineToScan.Type == CobolTextLineType.Continuation; // - or because the following line is a continuation line if (!partOfMultilineContinuation && lineToScanIndex < (documentLines.Count - 1)) { nextLineToScanIndex = lineToScanIndex + 1; nextLineToScan = documentLines[nextLineToScanIndex]; partOfMultilineContinuation = nextLineToScan.Type == CobolTextLineType.Continuation; } // ^-- LIMITATION : we don't support the case where one or more comment lines follow the current line before a continuation line // Case 1 : the line is not part of a multiline continuation => we can parse it as as a standalone line if (!partOfMultilineContinuation) { // Create a new copy of the line before the update if necessary lineToScan = (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner); if (lineToScanIndex == 0) { if (scanState != null) { // Scan the first line of the document Scanner.ScanFirstLine(lineToScan, scanState.InsideDataDivision, scanState.DecimalPointIsComma, scanState.WithDebuggingMode, textSourceInfo.EncodingForAlphanumericLiterals, compilerOptions, copyTextNameVariations); } else { Scanner.ScanFirstLine(lineToScan, false, false, false, textSourceInfo.EncodingForAlphanumericLiterals, compilerOptions, copyTextNameVariations); } } else { // Get the scan state at the end of the previous line TokensLine previousLine = documentLines[lineToScanIndex - 1]; // Scan the current line with this initial scan state Scanner.ScanTokensLine(lineToScan, previousLine.ScanState, compilerOptions, copyTextNameVariations); } tokensLinesChanges.Add(new DocumentChange <ITokensLine>(DocumentChangeType.LineUpdated, lineToScanIndex, lineToScan)); return(lineToScan.ScanState); } // Case 2 : the line is part of a multiline continuation => we must parse all continuation lines as a group else { // Build a list of the lines we will have to scan as a group : IList <TokensLine> continuationLinesGroup = new List <TokensLine>(); int firstLineIndex = lineToScanIndex; continuationLinesGroup.Insert(0, (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); // Navigate backwards to the start of the multiline continuation if (lineToScan.Type == CobolTextLineType.Continuation && lineToScanIndex > 0) { int revLineToScanIndex = lineToScanIndex; IEnumerator <TokensLine> reversedEnumerator = documentLines.GetEnumerator(lineToScanIndex - 1, -1, true); while (reversedEnumerator.MoveNext()) { // Get the previous line until a non continuation and non comment line is encountered revLineToScanIndex--; lineToScan = reversedEnumerator.Current; if (lineToScan.Type != CobolTextLineType.Continuation /*&& <-- LIMITATION : this compiler does not support comment or blank lines between two continuation line * lineToScan.Type != CobolTextLineType.Comment && lineToScan.Type != CobolTextLineType.Blank*/) // see p54 : for continuation, blank lines are treated like comment lines { firstLineIndex = revLineToScanIndex; continuationLinesGroup.Insert(0, (TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); } else { break; } } } // Reuse our knowledge of the next line if it is available and if it is a continuation if (nextLineToScan != null && nextLineToScan.Type == CobolTextLineType.Continuation) { lineToScanIndex++; lineToScan = nextLineToScan; continuationLinesGroup.Add((TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); nextLineToScanIndex = -1; nextLineToScan = null; } // Navigate forwards to the end of the multiline continuation if (lineToScanIndex < (documentLines.Count - 1)) { IEnumerator <TokensLine> enumerator = documentLines.GetEnumerator(lineToScanIndex + 1, -1, false); while (enumerator.MoveNext()) { // Get the next line until a non continuation and non comment line is encountered lineToScanIndex++; lineToScan = enumerator.Current; if (lineToScan.Type == CobolTextLineType.Continuation /*|| <-- LIMITATION : this compiler does not support comment or blank lines between two continuation line * lineToScan.Type == CobolTextLineType.Comment || lineToScan.Type == CobolTextLineType.Blank*/) // see p54 : for continuation, blank lines are treated like comment lines { // Add this line at the end of the list of continuation lines continuationLinesGroup.Add((TokensLine)prepareDocumentLineForUpdate(lineToScanIndex, lineToScan, CompilationStep.Scanner)); } else { // Save the knowledge of the next line and exit the loop nextLineToScanIndex = lineToScanIndex; nextLineToScan = lineToScan; break; } } } // Scan the group of continuation lines if (firstLineIndex == 0) { // Scan the first line group of the document Scanner.ScanFirstLineContinuationGroup(continuationLinesGroup, false, false, false, textSourceInfo.EncodingForAlphanumericLiterals, compilerOptions, copyTextNameVariations); } else { // Get the scan state at the end of the previous line TokensLine previousLine = documentLines[firstLineIndex - 1]; // Scan the current line group with this initial scan state Scanner.ScanTokensLineContinuationGroup(continuationLinesGroup, previousLine.ScanState, compilerOptions, copyTextNameVariations); } int updatedLineIndex = firstLineIndex; foreach (TokensLine updatedLine in continuationLinesGroup) { tokensLinesChanges.Add(new DocumentChange <ITokensLine>(DocumentChangeType.LineUpdated, updatedLineIndex, updatedLine)); updatedLineIndex++; } return(continuationLinesGroup[continuationLinesGroup.Count - 1].ScanState); } }
private static void ScanTokensLineWithMultilineScanState(int lineToScanIndex, TokensLine lineToScan, TextSourceInfo textSourceInfo, ISearchableReadOnlyList <TokensLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, List <RemarksDirective.TextNameVariation> copyTextNameVariations, IList <DocumentChange <ITokensLine> > tokensLinesChanges, MultilineScanState initialScanState, out int nextLineToScanIndex, out TokensLine nextLineToScan) { // Scan the current line (or continuation lines group) MultilineScanState scanState = ScanTokensLineWithContinuations(lineToScanIndex, lineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, tokensLinesChanges, initialScanState, out nextLineToScanIndex, out nextLineToScan); // Scan the following lines until we find that the scan state at the beginning of the next line has been updated while (nextLineToScan != null && nextLineToScan.InitialScanState != null && !nextLineToScan.InitialScanState.Equals(scanState)) { scanState = ScanTokensLineWithContinuations(nextLineToScanIndex, nextLineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, tokensLinesChanges, scanState, out nextLineToScanIndex, out nextLineToScan); } }
internal static IList <DocumentChange <ITokensLine> > ScanTextLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <TokensLine> documentLines, IList <DocumentChange <ICobolTextLine> > textLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, List <RemarksDirective.TextNameVariation> copyTextNameVariations, [CanBeNull] MultilineScanState scanState) { // Collect all changes applied to the tokens lines during the incremental scan IList <DocumentChange <ITokensLine> > tokensLinesChanges = new List <DocumentChange <ITokensLine> >(); // There are 3 reasons to scan a line after a text change : // 1. New text lines which were just inserted or updated must be scanned for the first time // 2. Text lines must be scanned again if their initial scan state changed : a new scan of the previous line can alter the scan state at the beginning of the following line // 3. Continuation lines and multiline tokens : if a line participates in a continuation on several lines, scan the group of lines as a whole // IMPORTANT : the text changes are ordered in increasing order of line index for (int textChangeIndex = 0; textChangeIndex < textLinesChanges.Count; textChangeIndex++) { // Local variables used to optimize navigation in the document int nextLineToScanIndex = -1; TokensLine nextLineToScan = null; // Update tokens depending on the current text change DocumentChange <ICobolTextLine> textChange = textLinesChanges[textChangeIndex]; if (textChange.Type == DocumentChangeType.DocumentCleared) { tokensLinesChanges.Add(new DocumentChange <ITokensLine>(DocumentChangeType.DocumentCleared, 0, null)); continue; } else if (textChange.Type == DocumentChangeType.LineInserted || textChange.Type == DocumentChangeType.LineUpdated) { // We update lines as a group below, but we remember here which lines were inserted if (textChange.Type == DocumentChangeType.LineInserted) { tokensLinesChanges.Add(new DocumentChange <ITokensLine>(DocumentChangeType.LineInserted, textChange.LineIndex, (ITokensLine)textChange.NewLine)); } // Text lines which were inserted or updated must be scanned again ScanTokensLineWithMultilineScanState(textChange.LineIndex, (TokensLine)textChange.NewLine, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, tokensLinesChanges, scanState, out nextLineToScanIndex, out nextLineToScan); } else if (textChange.Type == DocumentChangeType.LineRemoved) { tokensLinesChanges.Add(new DocumentChange <ITokensLine>(DocumentChangeType.LineRemoved, textChange.LineIndex, (ITokensLine)textChange.NewLine)); // Get the last line just before the line that was removed TokensLine previousLine = null; if (textChange.LineIndex > 0) { previousLine = documentLines[textChange.LineIndex - 1]; } // When a text line is removed : // - the previous line must be scanned again if the line which was removed was a member of a multiline continuation group if (previousLine != null && previousLine.HasTokenContinuedOnNextLine) { ScanTokensLineWithMultilineScanState(textChange.LineIndex - 1, previousLine, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, tokensLinesChanges, scanState, out nextLineToScanIndex, out nextLineToScan); } if (nextLineToScan == null && textChange.LineIndex < documentLines.Count) { nextLineToScanIndex = textChange.LineIndex; nextLineToScan = documentLines[nextLineToScanIndex]; } // - the next line must be scanned again if the scan state at the end of the previous line is different from the scan state at the beginning of the next line if (nextLineToScan != null && nextLineToScanIndex == textChange.LineIndex && previousLine != null && nextLineToScan.InitialScanState.Equals(previousLine.ScanState)) { ScanTokensLineWithMultilineScanState(textChange.LineIndex, nextLineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, copyTextNameVariations, tokensLinesChanges, scanState, out nextLineToScanIndex, out nextLineToScan); } } // We can skip all text changes with an index smaller than the index of the last line which was already scanned if (nextLineToScan == null) { break; } else if (textChangeIndex < (textLinesChanges.Count - 1)) { int nextTextChangeIndex = textChangeIndex; DocumentChange <ICobolTextLine> nextTextChange = null; do { nextTextChangeIndex++; nextTextChange = textLinesChanges[nextTextChangeIndex]; }while (nextTextChangeIndex < (textLinesChanges.Count - 1) && nextTextChange.LineIndex <= nextLineToScanIndex); textChangeIndex = nextTextChangeIndex - 1; } } return(tokensLinesChanges); }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh IEnumerator <ParseSection> parseSectionsEnumerator = null; ParseSection currentParseSection = null; if (refreshParseSections != null) { // Get the first code section we need to refresh parseSectionsEnumerator = refreshParseSections.GetEnumerator(); parseSectionsEnumerator.MoveNext(); currentParseSection = parseSectionsEnumerator.Current; // Seek just before the next code element starting token tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // Parse a list of code elements for each parse section while advancing in the underlying token stream do { // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { codeElementsParseTree = cobolParser.cobolCodeElements(); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.InitialLineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.InitialLineIndex, codeElementsLine)); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: if (tokenStream.Lt(1) != null) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tokenStream.Lt(1)); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); if (codeElement.Diagnostics != null) { foreach (Diagnostic d in codeElement.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); codeElementsLine.AddParserDiagnostic(d); } } } } // In case of incremental parsing, directly jump to next parse section in the token stream // Else, simply start parsing the next CodeElement beginning with the next token if (currentParseSection != null) { // Adavance to the next ParseSection if (parseSectionsEnumerator.MoveNext()) { currentParseSection = parseSectionsEnumerator.Current; tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // No more section to parse else { break; } } }while (tokenStream.La(1) >= 0); return(codeElementsLinesChanges); }
/// <summary> /// Illustration : lines with code elements continued from previous line or with a continuation on next line (before update) /// SW represents a code element starting word /// [ SW x] /// [ x] /// [SW x] /// [ SW ] /// [ x SW] /// A DocumentChange intersects with a previously parsed multiline code element if : /// * LineInserted : /// - all previous lines until the last starting word /// (get previous lines until the previous code element is found) /// - all the next lines until the next starting word /// (get next lines until the next code element is found, /// do not reset the last line if the next code element starts at the beginning of the line) /// * LineUpdated / LineRemoved : /// - all previous lines until the last starting word /// (get previous lines until the previous code element is found) /// - all the next lines until the next starting word /// (get next lines until the next code element is found, /// do not reset the last line if the next code element starts at the beginning of the line) /// When navigating to previous or next line searching for a code element, we can stop when a fresh insertion / update is encountered. /// When we reset a line which was not directly updated, and where the code element started in the middle of the line, /// we must "remember" at which token we must start parsing. /// In conclusion, the incremental parsing step for code elements is divided in two steps : /// 1. List all the starting and stop tokens of sections to parse with the rules above /// 2. Parse code elements beginning with starting token and until we reach stop token /// </summary> private static ParseSection CheckIfAdjacentLinesNeedRefresh(DocumentChangeType changeType, int lineIndex, ISearchableReadOnlyList <CodeElementsLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges, ParseSection lastParseSection) { ParseSection currentParseSection = new ParseSection(); // Navigate backwards to the start of the multiline code element if (lineIndex > 0) { int previousLineIndex = lineIndex; int lastLineIndexReset = lastParseSection != null ? lastParseSection.StopLineIndex : -1; IEnumerator <CodeElementsLine> reversedEnumerator = documentLines.GetEnumerator(previousLineIndex - 1, -1, true); bool previousLineHasCodeElements = false; while (reversedEnumerator.MoveNext() && (--previousLineIndex > lastLineIndexReset)) { // Get the previous line until the first code element is encountered CodeElementsLine previousLine = reversedEnumerator.Current; // The start of the parse section is delimited by the previous CodeElement if (previousLine != null) { previousLineHasCodeElements = previousLine.HasCodeElements; if (previousLineHasCodeElements) { currentParseSection.StartLineIndex = previousLineIndex; currentParseSection.StartToken = previousLine.CodeElements.First().ConsumedTokens.FirstOrDefault(); } // All lines contained in the parse section could be modified, and should be reset previousLine = (CodeElementsLine)prepareDocumentLineForUpdate(previousLineIndex, previousLine, CompilationStep.CodeElementsParser); previousLine.ResetCodeElements(); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, previousLineIndex, previousLine)); } // Stop iterating backwards as soon as the start of an old CodeElement is found if (previousLineHasCodeElements) { break; } } // If no CodeElement was found on the previous lines, current parse section starts at the beggining of the file // (because last parseSection could only stop at a CodeElement or at the end of the file) if (!previousLineHasCodeElements) { currentParseSection.StartLineIndex = 0; currentParseSection.StartToken = null; } } // If line 0 was updated, current parse section starts at the beggining of the file else { currentParseSection.StartLineIndex = 0; currentParseSection.StartToken = null; } // Navigate forwards to the end of the multiline code element if (lineIndex < (documentLines.Count - 1)) { int nextLineIndex = lineIndex; IEnumerator <CodeElementsLine> enumerator = documentLines.GetEnumerator(nextLineIndex + 1, -1, false); bool nextLineHasCodeElements = false; while (enumerator.MoveNext()) { // Get the next line until non continuation line is encountered nextLineIndex++; CodeElementsLine nextLine = enumerator.Current; // Check if the next CodeElement found starts at the beginning of the line if (nextLine != null) { nextLineHasCodeElements = nextLine.HasCodeElements; bool nextCodeElementStartsAtTheBeginningOfTheLine = false; if (nextLineHasCodeElements) { try { Token startTokenForNextParseSection = nextLine.CodeElements.First().ConsumedTokens.FirstOrDefault(); Token firstSourceTokenOfThisLine = nextLine.TokensWithCompilerDirectives.First(token => token.Channel == Token.CHANNEL_SourceTokens); nextCodeElementStartsAtTheBeginningOfTheLine = startTokenForNextParseSection == firstSourceTokenOfThisLine; } catch (System.InvalidOperationException /*e*/) {//JCM: 28/08/2017: I noticed that this Exception can occur if: it doesn't exists a token which verifies the predicate: token.Channel == Token.CHANNEL_SourceToken nextCodeElementStartsAtTheBeginningOfTheLine = false; } } // All lines contained in the parse section could be modified if (!nextCodeElementStartsAtTheBeginningOfTheLine) { // TO DO : ERROR below, will not work if we have source tokens from previous CodeElement + one other CodeElement on the same line // => the other CodeElement will be deleted by prepareDocumentLineForUpdate and not parsed again nextLine = (CodeElementsLine)prepareDocumentLineForUpdate(nextLineIndex, nextLine, CompilationStep.CodeElementsParser); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, nextLineIndex, nextLine)); } // Stop iterating forwards as soon as the start of an old CodeElement is found if (nextLineHasCodeElements) { currentParseSection.StopLineIndex = nextLineIndex; currentParseSection.StopToken = nextLine.CodeElements.First().ConsumedTokens.FirstOrDefault(); currentParseSection.StopTokenIsFirstTokenOfTheLine = nextCodeElementStartsAtTheBeginningOfTheLine; break; } } } // If no CodeElement was found on the next lines, current parse section current parse section ends at the end of the file if (!nextLineHasCodeElements) { currentParseSection.StopLineIndex = nextLineIndex; currentParseSection.StopToken = null; currentParseSection.StopTokenIsFirstTokenOfTheLine = false; } } // If last line was updated, or if no CodeElement was found after the updated line, current parse section ends at the end of the file else { currentParseSection.StopLineIndex = documentLines.Count - 1; currentParseSection.StopToken = null; currentParseSection.StopTokenIsFirstTokenOfTheLine = false; } return(currentParseSection); }
/// <summary> /// Incremental scan of a set of text lines changes /// </summary> internal static IList<DocumentChange<ITokensLine>> ScanTextLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList<TokensLine> documentLines, IList<DocumentChange<ICobolTextLine>> textLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the tokens lines during the incremental scan IList<DocumentChange<ITokensLine>> tokensLinesChanges = new List<DocumentChange<ITokensLine>>(); // There are 3 reasons to scan a line after a text change : // 1. New text lines which were just inserted or updated must be scanned for the first time // 2. Text lines must be scanned again if their initial scan state changed : a new scan of the previous line can alter the scan state at the beginning of the following line // 3. Continuation lines and multiline tokens : if a line participates in a continuation on several lines, scan the group of lines as a whole // IMPORTANT : the text changes are ordered in increasing order of line index for (int textChangeIndex = 0; textChangeIndex < textLinesChanges.Count; textChangeIndex++) { // Local variables used to optimize navigation in the document int nextLineToScanIndex = -1; TokensLine nextLineToScan = null; // Update tokens depending on the current text change DocumentChange<ICobolTextLine> textChange = textLinesChanges[textChangeIndex]; if (textChange.Type == DocumentChangeType.DocumentCleared) { tokensLinesChanges.Add(new DocumentChange<ITokensLine>(DocumentChangeType.DocumentCleared, 0, null)); continue; } else if (textChange.Type == DocumentChangeType.LineInserted || textChange.Type == DocumentChangeType.LineUpdated) { // We update lines as a group below, but we remember here which lines were inserted if(textChange.Type == DocumentChangeType.LineInserted) { tokensLinesChanges.Add(new DocumentChange<ITokensLine>(DocumentChangeType.LineInserted, textChange.LineIndex, (ITokensLine)textChange.NewLine)); } // Text lines which were inserted or updated must be scanned again ScanTokensLineWithMultilineScanState(textChange.LineIndex, (TokensLine)textChange.NewLine, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, tokensLinesChanges, out nextLineToScanIndex, out nextLineToScan); } else if (textChange.Type == DocumentChangeType.LineRemoved) { tokensLinesChanges.Add(new DocumentChange<ITokensLine>(DocumentChangeType.LineRemoved, textChange.LineIndex, (ITokensLine)textChange.NewLine)); // Get the last line just before the line that was removed TokensLine previousLine = null; if (textChange.LineIndex > 0) { previousLine = documentLines[textChange.LineIndex - 1]; } // When a text line is removed : // - the previous line must be scanned again if the line which was removed was a member of a multiline continuation group if (previousLine != null && previousLine.HasTokenContinuedOnNextLine) { ScanTokensLineWithMultilineScanState(textChange.LineIndex - 1, previousLine, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, tokensLinesChanges, out nextLineToScanIndex, out nextLineToScan); } if (nextLineToScan == null && textChange.LineIndex < documentLines.Count) { nextLineToScanIndex = textChange.LineIndex; nextLineToScan = documentLines[nextLineToScanIndex]; } // - the next line must be scanned again if the scan state at the end of the previous line is different from the scan state at the beginning of the next line if (nextLineToScan != null && nextLineToScanIndex == textChange.LineIndex && previousLine != null && nextLineToScan.InitialScanState.Equals(previousLine.ScanState)) { ScanTokensLineWithMultilineScanState(textChange.LineIndex, nextLineToScan, textSourceInfo, documentLines, prepareDocumentLineForUpdate, compilerOptions, tokensLinesChanges, out nextLineToScanIndex, out nextLineToScan); } } // We can skip all text changes with an index smaller than the index of the last line which was already scanned if (nextLineToScan == null) { break; } else if (textChangeIndex < (textLinesChanges.Count - 1)) { int nextTextChangeIndex = textChangeIndex; DocumentChange<ICobolTextLine> nextTextChange = null; do { nextTextChangeIndex++; nextTextChange = textLinesChanges[nextTextChangeIndex]; } while (nextTextChangeIndex < (textLinesChanges.Count - 1) && nextTextChange.LineIndex <= nextLineToScanIndex); textChangeIndex = nextTextChangeIndex - 1; } } return tokensLinesChanges; }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, PerfStatsForParserInvocation perfStatsForParserInvocation) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; ParseSection largestRefreshParseSection = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null && processedTokensLinesChanges.Count > 0) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } if (refreshParseSections != null) { //After getting all the parts refreshed, get the largest part that has been refreshed var minParseSection = refreshParseSections.OrderBy(p => p.StartLineIndex).First(); var maxParseSection = refreshParseSections.OrderByDescending(p => p.StopLineIndex).First(); largestRefreshParseSection = new ParseSection(minParseSection.StartLineIndex, minParseSection.StartToken, maxParseSection.StopLineIndex, maxParseSection.StopToken, maxParseSection.StopTokenIsFirstTokenOfTheLine); } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(cobolParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations cobolParser = new CodeElementsTracingParser(tokenStream); var tokensCountIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, tokensCountIterator); } // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh if (largestRefreshParseSection != null) { // Seek just before the next code element starting token tokenStream.SeekToToken(largestRefreshParseSection.StartToken); tokenStream.StartLookingForStopToken(largestRefreshParseSection.StopToken); //Remove all the code elements for the future line to parse. for (int i = largestRefreshParseSection.StartLineIndex; i < (largestRefreshParseSection.StopLineIndex == documentLines.Count - 1 && largestRefreshParseSection.StopToken == null //If the last index is equals to number of line in document, make sure to also reset the last line, otherwise, reset lines normally. ? largestRefreshParseSection.StopLineIndex + 1 : largestRefreshParseSection.StopLineIndex); i++) { if (documentLines[i].CodeElements != null) { documentLines[i].ResetCodeElements(); } } } // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } codeElementsParseTree = cobolParser.cobolCodeElements(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(codeElementsParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); if (codeElementsLine == null) { continue; } // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.LineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add( new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.LineIndex, codeElementsLine)); perfStatsForParserInvocation.OnStartTreeBuilding(); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code, ex)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: var tempToken = tokenStream.Lt(1); if (tempToken != null && tempToken != Token.END_OF_FILE) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tempToken); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(d); } } } } perfStatsForParserInvocation.OnStopTreeBuilding(); } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(cobolParser.ParseInfo.DecisionInfo, (int)(cobolParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } return(codeElementsLinesChanges); }
/// <summary> /// Illustration : lines with code elements continued from previous line or with a continuation on next line (before update) /// SW represents a code element starting word /// [ SW x] /// [ x] /// [SW x] /// [ SW ] /// [ x SW] /// A DocumentChange intersects with a previously parsed multiline code element if : /// * LineInserted : /// - all previous lines until the last starting word /// (get previous lines until the previous code element is found) /// - all the next lines until the next starting word /// (get next lines until the next code element is found, /// do not reset the last line if the next code element starts at the beginning of the line) /// * LineUpdated / LineRemoved : /// - all previous lines until the last starting word /// (get previous lines until the previous code element is found) /// - all the next lines until the next starting word /// (get next lines until the next code element is found, /// do not reset the last line if the next code element starts at the beginning of the line) /// When navigating to previous or next line searching for a code element, we can stop when a fresh insertion / update is encountered. /// When we reset a line which was not directly updated, and where the code element started in the middle of the line, /// we must "remember" at which token we must start parsing. /// In conclusion, the incremental parsing step for code elements is divided in two steps : /// 1. List all the starting and stop tokens of sections to parse with the rules above /// 2. Parse code elements beginning with starting token and until we reach stop token /// </summary> private static ParseSection CheckIfAdjacentLinesNeedRefresh(DocumentChangeType changeType, int lineIndex, ISearchableReadOnlyList<CodeElementsLine> documentLines, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, IList<DocumentChange<ICodeElementsLine>> codeElementsLinesChanges, ParseSection lastParseSection) { ParseSection currentParseSection = new ParseSection(); // Navigate backwards to the start of the multiline code element if (lineIndex > 0) { int previousLineIndex = lineIndex; int lastLineIndexReset = lastParseSection != null ? lastParseSection.StopLineIndex : - 1; IEnumerator<CodeElementsLine> reversedEnumerator = documentLines.GetEnumerator(previousLineIndex - 1, -1, true); while (reversedEnumerator.MoveNext() && (--previousLineIndex > lastLineIndexReset)) { // Get the previous line until the first code element is encountered CodeElementsLine previousLine = reversedEnumerator.Current; // The start of the parse section is delimited by the previous CodeElement bool previousLineHasCodeElements = previousLine.HasCodeElements; if (previousLineHasCodeElements) { currentParseSection.StartLineIndex = previousLineIndex; currentParseSection.StartToken = previousLine.CodeElements[0].ConsumedTokens[0]; } // All lines contained in the parse section could be modified, and should be reset previousLine = (CodeElementsLine)prepareDocumentLineForUpdate(previousLineIndex, previousLine, CompilationStep.CodeElementsParser); previousLine.ResetCodeElements(); codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, previousLineIndex, previousLine)); // Stop iterating backwards as soon as the start of an old CodeElement is found if (previousLineHasCodeElements) { break; } } } // Navigate forwards to the end of the multiline code element if (lineIndex < (documentLines.Count - 1)) { int nextLineIndex = lineIndex; IEnumerator<CodeElementsLine> enumerator = documentLines.GetEnumerator(nextLineIndex + 1, -1, true); while (enumerator.MoveNext()) { // Get the next line until non continuation line is encountered nextLineIndex++; CodeElementsLine nextLine = enumerator.Current; // Check if the next CodeElement found starts at the beginning of the line bool nextCodeElementStartsAtTheBeginningOfTheLine = false; if (nextLine.HasCodeElements) { Token startTokenForNextParseSection = nextLine.CodeElements[0].ConsumedTokens[0]; Token firstSourceTokenOfThisLine = nextLine.TokensWithCompilerDirectives.First(token => token.Channel == Token.CHANNEL_SourceTokens); nextCodeElementStartsAtTheBeginningOfTheLine = startTokenForNextParseSection == firstSourceTokenOfThisLine; } // All lines contained in the parse section could be modified if (!nextCodeElementStartsAtTheBeginningOfTheLine) { nextLine = (CodeElementsLine)prepareDocumentLineForUpdate(nextLineIndex, nextLine, CompilationStep.CodeElementsParser); codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, nextLineIndex, nextLine)); } // Stop iterating forwards as soon as the start of an old CodeElement is found if (nextLine.HasCodeElements) { if (!nextCodeElementStartsAtTheBeginningOfTheLine) { currentParseSection.StopLineIndex = nextLineIndex; currentParseSection.StopToken = nextLine.CodeElements[0].ConsumedTokens[0]; } else { currentParseSection.StopLineIndex = nextLineIndex; while (currentParseSection.StopToken == null && currentParseSection.StopLineIndex >= 1) { currentParseSection.StopLineIndex = currentParseSection.StopLineIndex - 1; currentParseSection.StopToken = documentLines[currentParseSection.StopLineIndex].TokensWithCompilerDirectives.Last(token => token.Channel == Token.CHANNEL_SourceTokens); } } currentParseSection.StopTokenIsLastTokenOfTheLine = nextCodeElementStartsAtTheBeginningOfTheLine; break; } } } // Current parse section ends with the updated line else { currentParseSection.StopLineIndex = lineIndex + 1; while (currentParseSection.StopToken == null && currentParseSection.StopLineIndex >= 1) { currentParseSection.StopLineIndex = currentParseSection.StopLineIndex - 1; currentParseSection.StopToken = documentLines[currentParseSection.StopLineIndex].TokensWithCompilerDirectives.Last(token => token.Channel == Token.CHANNEL_SourceTokens); } currentParseSection.StopTokenIsLastTokenOfTheLine = true; } return currentParseSection; }