private static ParseResult ProceedNonTerminal(ParseContext context, SqlGrammarRuleSequenceNonTerminal nonTerminal, int level, int tokenStartOffset, bool tokenReverted, ReservedWordScope scope) { if (nonTerminal.TargetRule.Scope != ReservedWordScope.Inherit) { scope = nonTerminal.TargetRule.Scope; } var bestCandidateNodes = new List <StatementGrammarNode>(); var workingNodes = new List <StatementGrammarNode>(); var nonTerminalId = nonTerminal.Id; var result = new ParseResult { NodeId = nonTerminalId, Nodes = workingNodes, BestCandidates = bestCandidateNodes, }; var workingTerminalCount = 0; var bestCandidateTerminalCount = 0; var totalTokenCount = context.TokenBuffer.Count; var isPlSqlStatement = String.Equals(nonTerminalId, OracleGrammarDescription.NonTerminals.PlSqlStatementType); if (isPlSqlStatement) { context.PlSqlStatementTokenIndex.Push(tokenStartOffset); } foreach (var sequence in nonTerminal.TargetRule.Sequences) { context.CancellationToken.ThrowIfCancellationRequested(); result.Status = ParseStatus.Success; workingNodes.Clear(); workingTerminalCount = 0; var bestCandidatesCompatible = false; var isSequenceValid = true; foreach (ISqlGrammarRuleSequenceItem item in sequence.Items) { var tokenOffset = tokenStartOffset + workingTerminalCount; var isNodeRequired = item.IsRequired; if (tokenOffset >= totalTokenCount && !isNodeRequired) { continue; } var childNodeId = item.Id; if (!isNodeRequired && workingTerminalCount == 0 && String.Equals(childNodeId, nonTerminalId)) { continue; } var bestCandidateOffset = tokenStartOffset + bestCandidateTerminalCount; var tryBestCandidates = bestCandidatesCompatible && !tokenReverted && bestCandidateTerminalCount > workingTerminalCount; if (item is SqlGrammarRuleSequenceNonTerminal childNonTerminal) { var nestedResult = ProceedNonTerminal(context, childNonTerminal, level + 1, tokenOffset, false, scope); var optionalTokenReverted = TryRevertOptionalToken(optionalTerminalCount => ProceedNonTerminal(context, childNonTerminal, level + 1, tokenOffset - optionalTerminalCount, true, scope), ref nestedResult, workingNodes); workingTerminalCount -= optionalTokenReverted; TryParseInvalidGrammar(tryBestCandidates, () => ProceedNonTerminal(context, childNonTerminal, level + 1, bestCandidateOffset, false, scope), ref nestedResult, workingNodes, bestCandidateNodes, ref workingTerminalCount); var isNestedNodeValid = nestedResult.Status == ParseStatus.Success; if (isNodeRequired || isNestedNodeValid) { result.Status = nestedResult.Status; } var nestedNode = new StatementGrammarNode(NodeType.NonTerminal, context.Statement, null) { Id = childNodeId, Level = level, IsRequired = isNodeRequired, IsGrammarValid = isNestedNodeValid }; var alternativeNode = nestedNode.Clone(); int currentTerminalCount; if (nestedResult.BestCandidates.Count > 0 && ((currentTerminalCount = workingTerminalCount + nestedResult.BestCandidateTerminalCount) > bestCandidateTerminalCount || (currentTerminalCount == bestCandidateTerminalCount && isNestedNodeValid))) { alternativeNode.AddChildNodes(ResolveAlternativeNodes(nestedResult)); if (optionalTokenReverted > 0 || !isNestedNodeValid || workingNodes.Count != bestCandidateNodes.Count) { bestCandidateTerminalCount = CreateNewNodeList(workingNodes, bestCandidateNodes); } bestCandidateNodes.Add(alternativeNode); bestCandidateTerminalCount += alternativeNode.TerminalCount; bestCandidatesCompatible = true; } if (isNestedNodeValid && nestedResult.Nodes.Count > 0) { nestedNode.AddChildNodes(nestedResult.Nodes); workingNodes.Add(nestedNode); workingTerminalCount += nestedResult.TerminalCount; } if (result.Status == ParseStatus.SequenceNotFound) { if (workingNodes.Count == 0) { break; } isSequenceValid = false; workingNodes.Add(alternativeNode.Clone()); workingTerminalCount += alternativeNode.TerminalCount; } } else { var terminalReference = (SqlGrammarRuleSequenceTerminal)item; var terminalResult = IsTokenValid(context, terminalReference, level, tokenOffset, scope); TryParseInvalidGrammar(tryBestCandidates && isNodeRequired, () => IsTokenValid(context, terminalReference, level, bestCandidateOffset, scope), ref terminalResult, workingNodes, bestCandidateNodes, ref workingTerminalCount); if (terminalResult.Status == ParseStatus.SequenceNotFound) { if (isNodeRequired) { result.Status = ParseStatus.SequenceNotFound; break; } continue; } workingTerminalCount++; bestCandidateTerminalCount++; var terminalNode = terminalResult.Nodes[0]; workingNodes.Add(terminalNode); bestCandidateNodes.Add(terminalNode.Clone()); } } if (result.Status == ParseStatus.Success) { #region CASE WHEN issue if (bestCandidateTerminalCount > workingTerminalCount) { var currentTerminalCount = bestCandidateNodes.SelectMany(n => n.Terminals).TakeWhile(t => !t.Id.IsIdentifierOrAlias() && !t.Id.IsLiteral()).Count(); if (currentTerminalCount > workingTerminalCount && workingNodes.FirstOrDefault()?.FirstTerminalNode.Id.IsIdentifierOrAlias() == true) { workingNodes.ForEach(n => n.IsGrammarValid = false); } } #endregion if (isSequenceValid) { break; } } } if (isPlSqlStatement) { context.PlSqlStatementTokenIndex.Pop(); } result.BestCandidates = bestCandidateNodes; result.TerminalCount = workingTerminalCount; result.BestCandidateTerminalCount = bestCandidateTerminalCount; return(result); }
private static ParseResult ProceedNonTerminal(ParseContext context, SqlGrammarRuleSequenceNonTerminal nonTerminal, int level, int tokenStartOffset, bool tokenReverted, ReservedWordScope scope) { if (nonTerminal.TargetRule.Scope != ReservedWordScope.Inherit) { scope = nonTerminal.TargetRule.Scope; } var bestCandidateNodes = new List<StatementGrammarNode>(); var workingNodes = new List<StatementGrammarNode>(); var nonTerminalId = nonTerminal.Id; var result = new ParseResult { NodeId = nonTerminalId, Nodes = workingNodes, BestCandidates = bestCandidateNodes, }; var workingTerminalCount = 0; var bestCandidateTerminalCount = 0; var isPlSqlStatement = String.Equals(nonTerminalId, OracleGrammarDescription.NonTerminals.PlSqlStatementType); if (isPlSqlStatement) { context.PlSqlStatementTokenIndex.Push(tokenStartOffset); } foreach (var sequence in nonTerminal.TargetRule.Sequences) { context.CancellationToken.ThrowIfCancellationRequested(); result.Status = ParseStatus.Success; workingNodes.Clear(); workingTerminalCount = 0; var bestCandidatesCompatible = false; var isSequenceValid = true; foreach (ISqlGrammarRuleSequenceItem item in sequence.Items) { var tokenOffset = tokenStartOffset + workingTerminalCount; var isNodeRequired = item.IsRequired; if (tokenOffset >= context.TokenBuffer.Count && !isNodeRequired) { continue; } var childNodeId = item.Id; if (!isNodeRequired && workingTerminalCount == 0 && String.Equals(childNodeId, nonTerminalId)) { continue; } var bestCandidateOffset = tokenStartOffset + bestCandidateTerminalCount; var tryBestCandidates = bestCandidatesCompatible && !tokenReverted && bestCandidateTerminalCount > workingTerminalCount; var childNonTerminal = item as SqlGrammarRuleSequenceNonTerminal; if (childNonTerminal != null) { var nestedResult = ProceedNonTerminal(context, childNonTerminal, level + 1, tokenOffset, false, scope); var optionalTokenReverted = TryRevertOptionalToken(optionalTerminalCount => ProceedNonTerminal(context, childNonTerminal, level + 1, tokenOffset - optionalTerminalCount, true, scope), ref nestedResult, workingNodes); workingTerminalCount -= optionalTokenReverted; TryParseInvalidGrammar(tryBestCandidates, () => ProceedNonTerminal(context, childNonTerminal, level + 1, bestCandidateOffset, false, scope), ref nestedResult, workingNodes, bestCandidateNodes, ref workingTerminalCount); var isNestedNodeValid = nestedResult.Status == ParseStatus.Success; if (isNodeRequired || isNestedNodeValid) { result.Status = nestedResult.Status; } var nestedNode = new StatementGrammarNode(NodeType.NonTerminal, context.Statement, null) { Id = childNodeId, Level = level, IsRequired = isNodeRequired, IsGrammarValid = isNestedNodeValid }; var alternativeNode = nestedNode.Clone(); int currentTerminalCount; if (nestedResult.BestCandidates.Count > 0 && ((currentTerminalCount = workingTerminalCount + nestedResult.BestCandidateTerminalCount) > bestCandidateTerminalCount || (currentTerminalCount == bestCandidateTerminalCount && isNestedNodeValid))) { var bestCandidatePosition = new Dictionary<int, StatementGrammarNode>(); // Candidate nodes can be multiplied or terminals can be spread among different nonterminals, // therefore we fetch the node with most terminals or the later (when nodes contain same terminals). foreach (var candidate in nestedResult.BestCandidates) { StatementGrammarNode storedNode; if (!bestCandidatePosition.TryGetValue(candidate.SourcePosition.IndexStart, out storedNode) || storedNode.SourcePosition.IndexEnd <= candidate.SourcePosition.IndexEnd) { bestCandidatePosition[candidate.SourcePosition.IndexStart] = candidate; } } alternativeNode.AddChildNodes(bestCandidatePosition.Values); if (optionalTokenReverted > 0 || !isNestedNodeValid || workingNodes.Count != bestCandidateNodes.Count) { bestCandidateTerminalCount = CreateNewNodeList(workingNodes, bestCandidateNodes); } bestCandidateNodes.Add(alternativeNode); bestCandidateTerminalCount += alternativeNode.TerminalCount; bestCandidatesCompatible = true; } if (isNestedNodeValid && nestedResult.Nodes.Count > 0) { nestedNode.AddChildNodes(nestedResult.Nodes); workingNodes.Add(nestedNode); workingTerminalCount += nestedResult.TerminalCount; } if (result.Status == ParseStatus.SequenceNotFound) { if (workingNodes.Count == 0) { break; } isSequenceValid = false; workingNodes.Add(alternativeNode.Clone()); workingTerminalCount += alternativeNode.TerminalCount; } } else { var terminalReference = (SqlGrammarRuleSequenceTerminal)item; var terminalResult = IsTokenValid(context, terminalReference, level, tokenOffset, scope); TryParseInvalidGrammar(tryBestCandidates && isNodeRequired, () => IsTokenValid(context, terminalReference, level, bestCandidateOffset, scope), ref terminalResult, workingNodes, bestCandidateNodes, ref workingTerminalCount); if (terminalResult.Status == ParseStatus.SequenceNotFound) { if (isNodeRequired) { result.Status = ParseStatus.SequenceNotFound; break; } continue; } workingTerminalCount++; bestCandidateTerminalCount++; var terminalNode = terminalResult.Nodes[0]; workingNodes.Add(terminalNode); bestCandidateNodes.Add(terminalNode.Clone()); } } if (result.Status == ParseStatus.Success) { #region CASE WHEN issue if (bestCandidateTerminalCount > workingTerminalCount) { var currentTerminalCount = bestCandidateNodes.SelectMany(n => n.Terminals).TakeWhile(t => !t.Id.IsIdentifierOrAlias() && !t.Id.IsLiteral()).Count(); if (currentTerminalCount > workingTerminalCount && workingNodes.FirstOrDefault()?.FirstTerminalNode.Id.IsIdentifierOrAlias() == true) { workingNodes.ForEach(n => n.IsGrammarValid = false); } } #endregion if (isSequenceValid) { break; } } } if (isPlSqlStatement) { context.PlSqlStatementTokenIndex.Pop(); } result.BestCandidates = bestCandidateNodes; result.TerminalCount = workingTerminalCount; result.BestCandidateTerminalCount = bestCandidateTerminalCount; return result; }
private static StatementCollection ProceedGrammar(IEnumerable <OracleToken> tokens, CancellationToken cancellationToken) { var allTokens = new List <IToken>(); var tokenBuffer = new List <OracleToken>(); var commentBuffer = new List <OracleToken>(); foreach (var token in tokens) { if (token.CommentType == CommentType.None) { tokenBuffer.Add(token); } else { commentBuffer.Add(token); } allTokens.Add(token); } var oracleSqlCollection = new List <StatementBase>(); if (tokenBuffer.Count == 0) { return(new OracleStatementCollection(oracleSqlCollection, allTokens, commentBuffer.Select(c => new StatementCommentNode(null, c)))); } do { var result = new ParseResult(); var context = new ParseContext { CancellationToken = cancellationToken, Statement = new OracleStatement(), TokenBuffer = tokenBuffer }; foreach (var nonTerminal in AvailableNonTerminals) { var newResult = ProceedNonTerminal(context, nonTerminal, 1, 0, false, nonTerminal.TargetRule.Scope); //if (newResult.Nodes.SelectMany(n => n.AllChildNodes).Any(n => n.Terminals.Count() != n.TerminalCount)) // throw new ApplicationException("StatementGrammarNode TerminalCount value is invalid. "); if (newResult.Status != ParseStatus.Success) { if (result.BestCandidates == null || newResult.BestCandidates.Sum(n => n.TerminalCount) > result.BestCandidates.Sum(n => n.TerminalCount)) { result = newResult; } continue; } result = newResult; var lastTerminal = result.Nodes[result.Nodes.Count - 1].LastTerminalNode; if (lastTerminal == null || !TerminatorIds.Contains(lastTerminal.Id) && tokenBuffer.Count > result.Nodes.Sum(n => n.TerminalCount)) { if (lastTerminal != null) { var lastToken = result.BestCandidates.Last().LastTerminalNode.Token; var parsedTerminalCount = result.BestCandidates.Sum(n => n.TerminalCount); context.Statement.FirstUnparsedToken = tokenBuffer.Count > parsedTerminalCount ? tokenBuffer[parsedTerminalCount] : lastToken; } result.Status = ParseStatus.SequenceNotFound; } break; } int indexStart; int indexEnd; if (result.Status != ParseStatus.Success) { if (result.BestCandidates.Sum(n => n.TerminalCount) > result.Nodes.Sum(n => n.TerminalCount)) { result.Nodes = result.BestCandidates; } indexStart = tokenBuffer.First().Index; var index = tokenBuffer.FindIndex(t => TerminatorValues.Contains(t.Value)); if (index == -1) { var lastToken = tokenBuffer[tokenBuffer.Count - 1]; indexEnd = lastToken.Index + lastToken.Value.Length - 1; tokenBuffer.Clear(); } else { indexEnd = tokenBuffer[index].Index; tokenBuffer.RemoveRange(0, index + 1); } } else { var lastTerminal = result.Nodes[result.Nodes.Count - 1].LastTerminalNode.Token; indexStart = result.Nodes[0].FirstTerminalNode.Token.Index; indexEnd = lastTerminal.Index + lastTerminal.Value.Length - 1; tokenBuffer.RemoveRange(0, result.Nodes.Sum(n => n.TerminalCount)); var hasInvalidGrammarNodes = result.Nodes.Any(HasInvalidGrammarNodes); if (hasInvalidGrammarNodes) { result.Status = ParseStatus.SequenceNotFound; } } var lastNode = result.Nodes.LastOrDefault(); if (lastNode?.FirstTerminalNode != null && TerminatorIds.Contains(lastNode.FirstTerminalNode.Id)) { context.Statement.TerminatorNode = lastNode.FirstTerminalNode; result.Nodes.Remove(lastNode); } context.Statement.SourcePosition = SourcePosition.Create(indexStart, indexEnd); var rootNode = new StatementGrammarNode(NodeType.NonTerminal, context.Statement, null) { Id = result.NodeId, IsGrammarValid = result.Nodes.All(n => n.IsGrammarValid), IsRequired = true, }; rootNode.AddChildNodes(result.Nodes); context.Statement.RootNode = rootNode; context.Statement.ParseStatus = result.Status; oracleSqlCollection.Add(context.Statement); }while (tokenBuffer.Count > 0); var commentNodes = AddCommentNodes(oracleSqlCollection, commentBuffer); return(new OracleStatementCollection(oracleSqlCollection, allTokens, commentNodes)); }
private static StatementCollection ProceedGrammar(IEnumerable<OracleToken> tokens, CancellationToken cancellationToken) { var allTokens = new List<IToken>(); var tokenBuffer = new List<OracleToken>(); var commentBuffer = new List<OracleToken>(); foreach (var token in tokens) { if (token.CommentType == CommentType.None) { tokenBuffer.Add(token); } else { commentBuffer.Add(token); } allTokens.Add(token); } var oracleSqlCollection = new List<StatementBase>(); if (tokenBuffer.Count == 0) { return new OracleStatementCollection(oracleSqlCollection, allTokens, commentBuffer.Select(c => new StatementCommentNode(null, c))); } do { var result = new ParseResult(); var context = new ParseContext { CancellationToken = cancellationToken, Statement = new OracleStatement(), TokenBuffer = tokenBuffer }; foreach (var nonTerminal in AvailableNonTerminals) { var newResult = ProceedNonTerminal(context, nonTerminal, 1, 0, false, nonTerminal.TargetRule.Scope); //if (newResult.Nodes.SelectMany(n => n.AllChildNodes).Any(n => n.Terminals.Count() != n.TerminalCount)) // throw new ApplicationException("StatementGrammarNode TerminalCount value is invalid. "); if (newResult.Status != ParseStatus.Success) { if (result.BestCandidates == null || newResult.BestCandidates.Sum(n => n.TerminalCount) > result.BestCandidates.Sum(n => n.TerminalCount)) { result = newResult; } continue; } result = newResult; var lastTerminal = result.Nodes[result.Nodes.Count - 1].LastTerminalNode; if (lastTerminal == null || !TerminatorIds.Contains(lastTerminal.Id) && tokenBuffer.Count > result.Nodes.Sum(n => n.TerminalCount)) { if (lastTerminal != null) { var lastToken = result.BestCandidates.Last().LastTerminalNode.Token; var parsedTerminalCount = result.BestCandidates.Sum(n => n.TerminalCount); context.Statement.FirstUnparsedToken = tokenBuffer.Count > parsedTerminalCount ? tokenBuffer[parsedTerminalCount] : lastToken; } result.Status = ParseStatus.SequenceNotFound; } break; } int indexStart; int indexEnd; if (result.Status != ParseStatus.Success) { if (result.BestCandidates.Sum(n => n.TerminalCount) > result.Nodes.Sum(n => n.TerminalCount)) { result.Nodes = result.BestCandidates; } indexStart = tokenBuffer.First().Index; var index = tokenBuffer.FindIndex(t => TerminatorValues.Contains(t.Value)); if (index == -1) { var lastToken = tokenBuffer[tokenBuffer.Count - 1]; indexEnd = lastToken.Index + lastToken.Value.Length - 1; tokenBuffer.Clear(); } else { indexEnd = tokenBuffer[index].Index; tokenBuffer.RemoveRange(0, index + 1); } } else { var lastTerminal = result.Nodes[result.Nodes.Count - 1].LastTerminalNode.Token; indexStart = result.Nodes[0].FirstTerminalNode.Token.Index; indexEnd = lastTerminal.Index + lastTerminal.Value.Length - 1; tokenBuffer.RemoveRange(0, result.Nodes.Sum(n => n.TerminalCount)); var hasInvalidGrammarNodes = result.Nodes.Any(HasInvalidGrammarNodes); if (hasInvalidGrammarNodes) { result.Status = ParseStatus.SequenceNotFound; } } var lastNode = result.Nodes.LastOrDefault(); if (lastNode?.FirstTerminalNode != null && TerminatorIds.Contains(lastNode.FirstTerminalNode.Id)) { context.Statement.TerminatorNode = lastNode.FirstTerminalNode; result.Nodes.Remove(lastNode); } context.Statement.SourcePosition = SourcePosition.Create(indexStart, indexEnd); var rootNode = new StatementGrammarNode(NodeType.NonTerminal, context.Statement, null) { Id = result.NodeId, IsGrammarValid = result.Nodes.All(n => n.IsGrammarValid), IsRequired = true, }; rootNode.AddChildNodes(result.Nodes); context.Statement.RootNode = rootNode; context.Statement.ParseStatus = result.Status; oracleSqlCollection.Add(context.Statement); } while (tokenBuffer.Count > 0); var commentNodes = AddCommentNodes(oracleSqlCollection, commentBuffer); return new OracleStatementCollection(oracleSqlCollection, allTokens, commentNodes); }