public static void Check_CobolTokenSource_WithStartToken() { // Test file properties string relativePath = @"Compiler\Parser\Samples"; string textName = "MSVCOUT"; Encoding encoding = Encoding.GetEncoding(1252); DocumentFormat docFormat = DocumentFormat.RDZReferenceFormat; // Compile test file CompilationDocument compilationDocument = ParserUtils.ScanCobolFile(relativePath, textName, docFormat); // Search for first level 88 as a start token Token startToken = compilationDocument.TokensDocumentSnapshot.SourceTokens.First(t => (t.TokenType == TokenType.IntegerLiteral && ((IntegerLiteralTokenValue)t.LiteralValue).Number == 88)); // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, compilationDocument.TokensDocumentSnapshot.Lines, startToken, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, tokensIterator); IToken token = null; IList <IToken> tokensList = new List <IToken>(); for (int i = 0; i < 9; i++) { token = tokenSource.NextToken(); tokensList.Add(token); } if (tokensList[0].Text != "88" || tokensList[1].Text != ":MSVCOUT:-RtnCod-OK" || tokensList[7].Text != "VALUE" || tokensList[8].Text != "'STUB'") { throw new Exception("Token source nextToken method with start token KO"); } }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, PerfStatsForParserInvocation perfStatsForParserInvocation) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; ParseSection largestRefreshParseSection = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null && processedTokensLinesChanges.Count > 0) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } if (refreshParseSections != null) { //After getting all the parts refreshed, get the largest part that has been refreshed var minParseSection = refreshParseSections.OrderBy(p => p.StartLineIndex).First(); var maxParseSection = refreshParseSections.OrderByDescending(p => p.StopLineIndex).First(); largestRefreshParseSection = new ParseSection(minParseSection.StartLineIndex, minParseSection.StartToken, maxParseSection.StopLineIndex, maxParseSection.StopToken, maxParseSection.StopTokenIsFirstTokenOfTheLine); } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(cobolParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations cobolParser = new CodeElementsTracingParser(tokenStream); var tokensCountIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, tokensCountIterator); } // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh if (largestRefreshParseSection != null) { // Seek just before the next code element starting token tokenStream.SeekToToken(largestRefreshParseSection.StartToken); tokenStream.StartLookingForStopToken(largestRefreshParseSection.StopToken); //Remove all the code elements for the future line to parse. for (int i = largestRefreshParseSection.StartLineIndex; i < (largestRefreshParseSection.StopLineIndex == documentLines.Count - 1 && largestRefreshParseSection.StopToken == null //If the last index is equals to number of line in document, make sure to also reset the last line, otherwise, reset lines normally. ? largestRefreshParseSection.StopLineIndex + 1 : largestRefreshParseSection.StopLineIndex); i++) { if (documentLines[i].CodeElements != null) { documentLines[i].ResetCodeElements(); } } } // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } codeElementsParseTree = cobolParser.cobolCodeElements(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(codeElementsParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); if (codeElementsLine == null) { continue; } // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.LineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add( new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.LineIndex, codeElementsLine)); perfStatsForParserInvocation.OnStartTreeBuilding(); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code, ex)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: var tempToken = tokenStream.Lt(1); if (tempToken != null && tempToken != Token.END_OF_FILE) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tempToken); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); if (codeElementsLine != null) { codeElementsLine.AddParserDiagnostic(d); } } } } perfStatsForParserInvocation.OnStopTreeBuilding(); } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(cobolParser.ParseInfo.DecisionInfo, (int)(cobolParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } return(codeElementsLinesChanges); }
/// <summary> /// Incremental preprocessing of a set of tokens lines changes /// </summary> internal static IList <DocumentChange <IProcessedTokensLine> > ProcessTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <ProcessedTokensLine> documentLines, IList <DocumentChange <ITokensLine> > tokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions, IProcessedTokensDocumentProvider processedTokensDocumentProvider, List <RemarksDirective.TextNameVariation> copyTextNameVariations, PerfStatsForParserInvocation perfStatsForParserInvocation, List <CopyDirective> missingCopies) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges = new List <DocumentChange <IProcessedTokensLine> >(); // There are 2 reasons to a preprocess a tokens line after a change : // 1. A tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline compiler directive, the whole group of lines must be parsed again // Then, if a new COPY directive was parsed : the CompilationDocument to include must be prepared // --- PREPARATION PHASE : reset all processed tokens lines which were involved in a multiline compiler directive where at least one line changed --- // Iterate over all tokens changes detected by the ScannerStep : // refresh all the adjacent lines participating in a ContinuationTokensGroup if (tokensLinesChanges != null) { int lastLineIndexReset = -1; foreach (DocumentChange <ITokensLine> tokensChange in tokensLinesChanges) { processedTokensLinesChanges.Add(new DocumentChange <IProcessedTokensLine>(tokensChange.Type, tokensChange.LineIndex, (IProcessedTokensLine)tokensChange.NewLine)); if (tokensChange.LineIndex > lastLineIndexReset) { lastLineIndexReset = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, processedTokensLinesChanges, lastLineIndexReset); } } } // --- COMPILER DIRECTIVES PHASE : Find and parse all compiler directives --- // Init. Prepare a compiler directive parser // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( textSourceInfo.Name, documentLines, null, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init a compiler directive parser CommonTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CobolCompilerDirectivesParser directivesParser = new CobolCompilerDirectivesParser(tokenStream); // Optionnaly activate Antlr Parser performance profiling // WARNING : use this in a single-treaded context only (uses static field) if (AntlrPerformanceProfiler == null && perfStatsForParserInvocation.ActivateDetailedAntlrPofiling) { AntlrPerformanceProfiler = new AntlrPerformanceProfiler(directivesParser); } if (AntlrPerformanceProfiler != null) { // Replace the generated parser by a subclass which traces all rules invocations directivesParser = new CobolCompilerDirectivesTracingParser(tokenStream); AntlrPerformanceProfiler.BeginParsingFile(textSourceInfo, null); } IAntlrErrorStrategy compilerDirectiveErrorStrategy = new CompilerDirectiveErrorStrategy(); directivesParser.ErrorHandler = compilerDirectiveErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); directivesParser.RemoveErrorListeners(); directivesParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CompilerDirectiveBuilder directiveBuilder = new CompilerDirectiveBuilder(compilerOptions, copyTextNameVariations); // 1. Iterate over all compiler directive starting tokens found in the lines which were updated foreach (Token compilerDirectiveStartingToken in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing) .SelectMany(line => line.SourceTokens) .Where(token => token.TokenFamily == TokenFamily.CompilerDirectiveStartingKeyword)) { // 2. Reset the compiler directive parser state // Reset tokens iterator position before parsing // -> seek just before the compiler directive starting token tokensIterator.SeekToToken(compilerDirectiveStartingToken); tokensIterator.PreviousToken(); // Special case : for efficiency reasons, in EXEC SQL INCLUDE directives // only the third token INCLUDE is recognized as a compiler directive // starting keyword by the scanner. In this case, we must rewind the // iterator two tokens backwards to start parsing just before the EXEC token. if (compilerDirectiveStartingToken.TokenType == TokenType.EXEC_SQL_INCLUDE) { tokensIterator.PreviousToken(); tokensIterator.PreviousToken(); } // Reset Antlr BufferedTokenStream position tokenStream.SetTokenSource(tokenSource); // Reset parsing error diagnostics compilerDirectiveErrorStrategy.Reset(directivesParser); // 3. Try to parse a compiler directive starting with the current token perfStatsForParserInvocation.OnStartAntlrParsing(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.BeginParsingSection(); } CobolCompilerDirectivesParser.CompilerDirectingStatementContext directiveParseTree = directivesParser.compilerDirectingStatement(); if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingSection(directiveParseTree.ChildCount); } perfStatsForParserInvocation.OnStopAntlrParsing( AntlrPerformanceProfiler != null ? (int)AntlrPerformanceProfiler.CurrentFileInfo.DecisionTimeMs : 0, AntlrPerformanceProfiler != null ? AntlrPerformanceProfiler.CurrentFileInfo.RuleInvocations.Sum() : 0); perfStatsForParserInvocation.OnStartTreeBuilding(); // 4. Visit the parse tree to build a first class object representing the compiler directive walker.Walk(directiveBuilder, directiveParseTree); CompilerDirective compilerDirective = directiveBuilder.CompilerDirective; bool errorFoundWhileParsingDirective = compilerDirective == null || compilerDirective.Diagnostics != null || directiveParseTree.Diagnostics != null; // 5. Get all tokens consumed while parsing the compiler directive // and partition them line by line Token startToken = (Token)directiveParseTree.Start; Token stopToken = (Token)directiveParseTree.Stop; if (stopToken == null) { stopToken = startToken; } MultilineTokensGroupSelection tokensSelection = tokensIterator.SelectAllTokensBetween(startToken, stopToken); if (compilerDirective != null) { // 6. Replace all matched tokens by : // - a CompilerDirectiveToken on the first line ProcessedTokensLine firstProcessedTokensLine = documentLines[tokensSelection.FirstLineIndex]; if (tokensSelection.SelectedTokensOnSeveralLines.Length == 1) { firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], tokensSelection.TokensOnLastLineAfterStopToken, false); } else { TokensGroup continuedTokensGroup = firstProcessedTokensLine.InsertCompilerDirectiveTokenOnFirstLine( tokensSelection.TokensOnFirstLineBeforeStartToken, compilerDirective, errorFoundWhileParsingDirective, tokensSelection.SelectedTokensOnSeveralLines[0], null, true); // - a ContinuationTokensGroup on the following lines int selectionLineIndex = 1; int lastLineIndex = tokensSelection.FirstLineIndex + tokensSelection.SelectedTokensOnSeveralLines.Length - 1; for (int nextLineIndex = tokensSelection.FirstLineIndex + 1; nextLineIndex <= lastLineIndex; nextLineIndex++, selectionLineIndex++) { IList <Token> compilerDirectiveTokensOnNextLine = tokensSelection.SelectedTokensOnSeveralLines[selectionLineIndex]; if (compilerDirectiveTokensOnNextLine.Count > 0) { ProcessedTokensLine nextProcessedTokensLine = documentLines[nextLineIndex]; if (nextLineIndex != lastLineIndex) { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, null, true); } else { continuedTokensGroup = nextProcessedTokensLine.InsertCompilerDirectiveTokenOnNextLine( continuedTokensGroup, compilerDirectiveTokensOnNextLine, tokensSelection.TokensOnLastLineAfterStopToken, false); } } } } } // 7. Register compiler directive parse errors if (errorFoundWhileParsingDirective) { ProcessedTokensLine compilerDirectiveLine = documentLines[tokensSelection.FirstLineIndex]; if (compilerDirective != null && compilerDirective.Diagnostics != null) { foreach (Diagnostic directiveDiag in compilerDirective.Diagnostics) { compilerDirectiveLine.AddDiagnostic(directiveDiag); } } else if (directiveParseTree.Diagnostics != null) { foreach (Diagnostic directiveDiag in directiveParseTree.Diagnostics) { if (compilerDirective != null) { compilerDirective.AddDiagnostic(directiveDiag); } compilerDirectiveLine.AddDiagnostic(directiveDiag); } } } } if (AntlrPerformanceProfiler != null) { AntlrPerformanceProfiler.EndParsingFile(directivesParser.ParseInfo.DecisionInfo, (int)(directivesParser.ParseInfo.GetTotalTimeInPrediction() / 1000000)); } // 8. Advance the state off all ProcessedTokensLines : // NeedsCompilerDirectiveParsing => NeedsCopyDirectiveProcessing if it contains a COPY directive IList <ProcessedTokensLine> parsedLinesWithCopyDirectives = null; // NeedsCompilerDirectiveParsing => Ready otherwise foreach (ProcessedTokensLine parsedLine in documentLines .Where(line => line.PreprocessingState == ProcessedTokensLine.PreprocessorState.NeedsCompilerDirectiveParsing)) { if (parsedLine.ImportedDocuments != null) { if (parsedLinesWithCopyDirectives == null) { parsedLinesWithCopyDirectives = new List <ProcessedTokensLine>(); } parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.NeedsCopyDirectiveProcessing; parsedLinesWithCopyDirectives.Add(parsedLine); } else { parsedLine.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } perfStatsForParserInvocation.OnStopTreeBuilding(); // --- COPY IMPORT PHASE : Process COPY (REPLACING) directives --- foreach (var lineChange in processedTokensLinesChanges) { missingCopies.Remove(missingCopies.FirstOrDefault(c => c.COPYToken.Line == lineChange.LineIndex + 1)); } // 1. Iterate over all updated lines containing a new COPY directive if (parsedLinesWithCopyDirectives != null) { foreach (ProcessedTokensLine tokensLineWithCopyDirective in parsedLinesWithCopyDirectives) { // Iterate over all COPY directives found on one updated line foreach (CopyDirective copyDirective in tokensLineWithCopyDirective.ImportedDocuments.Keys.Where(c => c.TextName != null || c.COPYToken.TokenType == TokenType.EXEC).ToArray()) { try { PerfStatsForImportedDocument perfStats; // Load (or retrieve in cache) the document referenced by the COPY directive //Issue #315: tokensLineWithCopyDirective.ScanState must be passed because special names paragraph such as "Decimal point is comma" are declared in the enclosing program and can affect the parsing of COPY ProcessedTokensDocument importedDocumentSource = processedTokensDocumentProvider.GetProcessedTokensDocument(copyDirective.LibraryName, copyDirective.TextName, tokensLineWithCopyDirective.ScanStateBeforeCOPYToken[copyDirective.COPYToken], copyTextNameVariations, out perfStats); // Store it on the current line after applying the REPLACING directive ImportedTokensDocument importedDocument = new ImportedTokensDocument(copyDirective, importedDocumentSource, perfStats); tokensLineWithCopyDirective.ImportedDocuments[copyDirective] = importedDocument; } catch (Exception e) { if (missingCopies != null && copyDirective != null && copyDirective.COPYToken != null && !missingCopies.Contains(copyDirective)) //If list already contains the copy directive just ignore { var missingCopieToReplace = missingCopies.FirstOrDefault(c => c.COPYToken.Line == copyDirective.COPYToken.Line); missingCopies.Remove(missingCopieToReplace); missingCopies.Add(copyDirective); } // Text name refenced by COPY directive was not found // => register a preprocessor error on this line Token failedDirectiveToken = tokensLineWithCopyDirective.TokensWithCompilerDirectives .First( token => token.TokenType == TokenType.CopyImportDirective && ((CompilerDirectiveToken)token).CompilerDirective == copyDirective); Diagnostic diag = new Diagnostic( MessageCode.FailedToLoadTextDocumentReferencedByCopyDirective, failedDirectiveToken.Column, failedDirectiveToken.EndColumn, failedDirectiveToken.Line, e.Message, e); tokensLineWithCopyDirective.AddDiagnostic(diag); } } // Advance processing status of the line tokensLineWithCopyDirective.PreprocessingState = ProcessedTokensLine.PreprocessorState.Ready; } } // --- REPLACE PHASE : REPLACE directives are implemented in ReplaceTokensLinesIterator --- /* Algorithm : * * one REPLACE directive can express several replacement operations * one replacement operation can be of several types (distinguished for optimization purposes) * - SimpleTokenReplace : one source token / zero or one replacement token * - PartialWordReplace : one pure partial word / zero or one replacement token * - SimpleToMultipleTokenReplace : one source token / several replacement tokens * - MultipleTokenReplace : one first + several following source tokens / zero to many replacement tokens * * an iterator maintains a current set of replacement operations * * if nextToken is replace directive * the current set of replacement operations is updated * else * nextToken is compared to each replacement operation in turn * if single -> single source token operation : return replacement token * if single -> multiple operation : setup a secondary iterator with the list of replacement tokens * if multiple -> multiple operation * snapshot of the underlying iterator * try to match all of the following source tokens * if failure : restore snapshot and try next operation * if success : setup a secondary iterator * * token comparison sourceToken / replacementCandidate : * 1. Compare Token type * 2. If same token type and for families * AlphanumericLiteral * NumericLiteral * SyntaxLiteral * Symbol * => compare also Token text * * PartialCobolWord replacement : * p535 : The COPY statement with REPLACING phrase can be used to replace parts of words. * By inserting a dummy operand delimited by colons into the program text, * the compiler will replace the dummy operand with the required text. * Example 3 shows how this is used with the dummy operand :TAG:. * The colons serve as separators and make TAG a stand-alone operand. */ return(processedTokensLinesChanges); }
/// <summary> /// Incremental parsing of a set of processed tokens lines changes /// </summary> internal static IList <DocumentChange <ICodeElementsLine> > ParseProcessedTokensLinesChanges(TextSourceInfo textSourceInfo, ISearchableReadOnlyList <CodeElementsLine> documentLines, IList <DocumentChange <IProcessedTokensLine> > processedTokensLinesChanges, PrepareDocumentLineForUpdate prepareDocumentLineForUpdate, TypeCobolOptions compilerOptions) { // Collect all changes applied to the processed tokens lines during the incremental scan IList <DocumentChange <ICodeElementsLine> > codeElementsLinesChanges = new List <DocumentChange <ICodeElementsLine> >(); // There are 2 reasons to re-parse a tokens line after a change : // 1. The tokens line changed : these lines were already reset during the previous steps // 2. If a tokens line that changed was involved in the parsing of a multiline code element, the whole group of lines must be parsed again // --- PREPARATION PHASE : identify all parse sections where code elements need to be refreshed --- IList <ParseSection> refreshParseSections = null; // Iterate over all processed tokens changes detected by the PreprocessorStep : // - refresh all the adjacent lines participating in a CodeElement // - register the start and stop token for all sections of the document which need to be parsed again if (processedTokensLinesChanges != null) { // If the document was cleared, everything must be parsed again if (processedTokensLinesChanges[0].Type != DocumentChangeType.DocumentCleared) { refreshParseSections = new List <ParseSection>(); ParseSection lastParseSection = null; foreach (DocumentChange <IProcessedTokensLine> tokensChange in processedTokensLinesChanges) { if (lastParseSection == null || tokensChange.LineIndex > lastParseSection.StopLineIndex) { lastParseSection = CheckIfAdjacentLinesNeedRefresh(tokensChange.Type, tokensChange.LineIndex, documentLines, prepareDocumentLineForUpdate, codeElementsLinesChanges, lastParseSection); refreshParseSections.Add(lastParseSection); } } } } // --- INITIALIZE ANTLR CodeElements parser --- // Create a token iterator on top of pre-processed tokens lines ITokensLinesIterator tokensIterator = ProcessedTokensDocument.GetProcessedTokensIterator(textSourceInfo, documentLines); // Create an Antlr compatible token source on top of the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( textSourceInfo.Name, tokensIterator); // Init parser TokensLinesTokenStream tokenStream = new TokensLinesTokenStream(tokenSource, Token.CHANNEL_SourceTokens); CodeElementsParser cobolParser = new CodeElementsParser(tokenStream); // REVERT TO STD PARSER ==> TracingCobolParser cobolParser = new TracingCobolParser(tokenStream); // Customize error recovery strategy IAntlrErrorStrategy cobolErrorStrategy = new CodeElementErrorStrategy(); cobolParser.ErrorHandler = cobolErrorStrategy; // Register all parse errors in a list in memory ParserDiagnosticErrorListener errorListener = new ParserDiagnosticErrorListener(); cobolParser.RemoveErrorListeners(); cobolParser.AddErrorListener(errorListener); // Prepare to analyze the parse tree ParseTreeWalker walker = new ParseTreeWalker(); CodeElementBuilder codeElementBuilder = new CodeElementBuilder(); codeElementBuilder.Dispatcher = new CodeElementDispatcher(); codeElementBuilder.Dispatcher.CreateListeners(); // --- INCREMENTAL PARSING --- // In case of incremental parsing, parse only the code sections we need to refresh IEnumerator <ParseSection> parseSectionsEnumerator = null; ParseSection currentParseSection = null; if (refreshParseSections != null) { // Get the first code section we need to refresh parseSectionsEnumerator = refreshParseSections.GetEnumerator(); parseSectionsEnumerator.MoveNext(); currentParseSection = parseSectionsEnumerator.Current; // Seek just before the next code element starting token tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // Parse a list of code elements for each parse section while advancing in the underlying token stream do { // Reset parsing error diagnostics cobolErrorStrategy.Reset(cobolParser); // Try to parse code elements : // - starting with the current parse section Start token // - ending with the current parse section Stop token CodeElementsParser.CobolCodeElementsContext codeElementsParseTree = null; try { codeElementsParseTree = cobolParser.cobolCodeElements(); } catch (Exception e) { var currentToken = (Token)cobolParser.CurrentToken; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(currentToken); codeElementsLine.AddParserDiagnostic(new TokenDiagnostic(MessageCode.ImplementationError, currentToken, currentToken.Line, e)); } if (codeElementsParseTree != null) { // If the parse tree is not empty if (codeElementsParseTree.codeElement() != null && codeElementsParseTree.codeElement().Length > 0) { // Analyze the parse tree for each code element foreach (var codeElementParseTree in codeElementsParseTree.codeElement()) { // Get the first line that was parsed var tokenStart = (Token)codeElementParseTree.Start; CodeElementsLine codeElementsLine = GetCodeElementsLineForToken(tokenStart); // Register that this line was updated // COMMENTED FOR THE SAKE OF PERFORMANCE -- SEE ISSUE #160 //int updatedLineIndex = documentLines.IndexOf(codeElementsLine, codeElementsLine.InitialLineIndex); //codeElementsLinesChanges.Add(new DocumentChange<ICodeElementsLine>(DocumentChangeType.LineUpdated, updatedLineIndex, codeElementsLine)); codeElementsLinesChanges.Add(new DocumentChange <ICodeElementsLine>(DocumentChangeType.LineUpdated, codeElementsLine.InitialLineIndex, codeElementsLine)); // Visit the parse tree to build a first class object representing the code elements try { walker.Walk(codeElementBuilder, codeElementParseTree); } catch (Exception ex) { var code = MessageCode.ImplementationError; int line = 0; int start = 0; int stop = 0; if (codeElementsLine.SourceTokens != null && codeElementsLine.SourceTokens.Count > 0) { start = codeElementsLine.SourceTokens[0].StartIndex; stop = codeElementsLine.SourceTokens[codeElementsLine.SourceTokens.Count - 1].StopIndex; } codeElementsLine.AddParserDiagnostic(new ParserDiagnostic(ex.ToString(), start, stop, line, null, code)); } CodeElement codeElement = codeElementBuilder.CodeElement; if (codeElement != null) { // Attach consumed tokens and main document line numbers information to the code element if (codeElement.ConsumedTokens.Count == 0) { // ISSUE #204: if (tokenStream.Lt(1) != null) { // if not end of file, // add next token to ConsumedTokens to know where is the CodeElement in error codeElement.ConsumedTokens.Add((Token)tokenStream.Lt(1)); // this alter CodeElements semantics: in addition to matched tokens, // it includes the first token in error if no token has been matched } } //TODO Issue #384 to discuss if this code should stay here: //This should be in a Checker, but "codeElement.ConsumedTokens" is only set after all the checkers have been called //Rule TCLIMITATION_NO_CE_ACROSS_SOURCES if (codeElement.IsAcrossSourceFile()) { DiagnosticUtils.AddError(codeElement, "A Cobol statement cannot be across 2 sources files (eg. Main program and a COPY)", MessageCode.TypeCobolParserLimitation); } // Add code element to the list codeElementsLine.AddCodeElement(codeElement); if (codeElement.Diagnostics != null) { foreach (Diagnostic d in codeElement.Diagnostics) { codeElementsLine.AddParserDiagnostic(d); } } } } } // If the parse tree contains errors if (codeElementsParseTree.Diagnostics != null) { foreach (ParserDiagnostic d in codeElementsParseTree.Diagnostics) { if (d.OffendingSymbol != null) { CodeElementsLine codeElementsLine = GetCodeElementsLineForToken((Token)d.OffendingSymbol); codeElementsLine.AddParserDiagnostic(d); } } } } // In case of incremental parsing, directly jump to next parse section in the token stream // Else, simply start parsing the next CodeElement beginning with the next token if (currentParseSection != null) { // Adavance to the next ParseSection if (parseSectionsEnumerator.MoveNext()) { currentParseSection = parseSectionsEnumerator.Current; tokenStream.SeekToToken(currentParseSection.StartToken); tokenStream.StartLookingForStopToken(currentParseSection.StopToken); } // No more section to parse else { break; } } }while (tokenStream.La(1) >= 0); return(codeElementsLinesChanges); }
public static void Check_CobolCharStream() { // Test file properties string relativePath = @"Compiler\Parser\Samples"; string textName = "MSVCOUT"; DocumentFormat documentFormat = DocumentFormat.RDZReferenceFormat; // Compile test file CompilationDocument compilationDocument = ParserUtils.ScanCobolFile(relativePath, textName, documentFormat); // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, compilationDocument.TokensDocumentSnapshot.Lines, null, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, tokensIterator); tokenSource.NextToken(); // Get underlying CharStream ICharStream charStream = tokenSource.InputStream; if (charStream.Index != 0) { throw new Exception("Char stream index should start at 0"); } if (charStream.La(0) != 0) { throw new Exception("La(0) should be 0"); } if (charStream.La(1) != '0') { throw new Exception("La(1) should be 0"); } if (charStream.La(4) != '1') { throw new Exception("La(4) should be 1"); } if (charStream.La(5) != '6') { throw new Exception("La(5) should be 6"); } charStream.Consume(); if (charStream.Index != 1) { throw new Exception("Char stream index should be 1 after consume"); } if (charStream.La(4) != '6') { throw new Exception("La(4) should be 6 after consume"); } if (charStream.La(80) != IntStreamConstants.Eof) { throw new Exception("La(80) should be Eof"); } charStream.Seek(12); if (charStream.Index != 12) { throw new Exception("Char stream index should be 12 after seek"); } if (charStream.La(-1) != ':') { throw new Exception("La(-1) should be : after seek"); } if (charStream.La(1) != 'M') { throw new Exception("La(1) should be M after seek"); } // should do nothing int marker = charStream.Mark(); charStream.Release(marker); if (charStream.La(2) != 'S') { throw new Exception("La(2) should be S after release"); } string text = charStream.GetText(new Interval(11, 18)); if (text != ":MSVCOUT") { throw new Exception("Char stream GetText method KO"); } if (charStream.Size != 80) { throw new Exception("Char stream size KO"); } }
public static void Check_CobolTokenSource() { // Test file properties string relativePath = @"Compiler\Parser\Samples"; string textName = "MSVCOUT"; DocumentFormat docFormat = DocumentFormat.RDZReferenceFormat; // Compile test file CompilationDocument compilationDocument = ParserUtils.ScanCobolFile(relativePath, textName, docFormat); // Create a token iterator on top of tokens lines TokensLinesIterator tokensIterator = new TokensLinesIterator( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, compilationDocument.TokensDocumentSnapshot.Lines, null, Token.CHANNEL_SourceTokens); // Crate an Antlr compatible token source on top a the token iterator TokensLinesTokenSource tokenSource = new TokensLinesTokenSource( compilationDocument.TokensDocumentSnapshot.TextSourceInfo.Name, tokensIterator); if (tokenSource.SourceName != "MSVCOUT") { throw new Exception("Token source name KO"); } var source = new Tuple <ITokenSource, ICharStream>(tokenSource, tokenSource.InputStream); IToken token = tokenSource.TokenFactory.Create(source, (int)TokenType.IntegerLiteral, "314", Token.CHANNEL_CompilerDirectives, 10, 20, 30, 17); if (token.Channel != Token.CHANNEL_CompilerDirectives || token.Column != 18 || token.Line != 1 || token.StartIndex != 17 || token.StopIndex != 16 || token.Text != "314" || token.TokenIndex != -1 || token.InputStream == null || token.TokenSource == null || token.Type != (int)TokenType.IntegerLiteral || ((IntegerLiteralTokenValue)((Token)token).LiteralValue).Number != 314) { throw new Exception("TokenFactory second Create method KO"); } if (tokenSource.Column != 0) { throw new Exception("Token source column should be 0 at start"); } if (tokenSource.Line != 1) { throw new Exception("Token source line should be 1 at start"); } IList <IToken> tokensList = new List <IToken>(); for (int i = 0; token.Type != (int)TokenType.EndOfFile; i++) { token = tokenSource.NextToken(); tokensList.Add(token); } if (tokensList.Count != 293 || tokensList[0].Text != "01" || tokensList[1].Text != ":MSVCOUT:" || tokensList[290].Text != "'/MSVCOUT'" || tokensList[292].Type != (int)TokenType.EndOfFile) { throw new Exception("Token source nextToken method KO"); } }