findShiftReducePattern(IEnumerable <SYMBOL_ENUM> shiftLhs, SYMBOL_ENUM reduceLhs, IEnumerable <SYMBOL_ENUM> readSymbols, SymbolChunk <SYMBOL_ENUM> input, Action <string> errorReporter) { HashSet <SYMBOL_ENUM> shift_set = new HashSet <SYMBOL_ENUM>(shiftLhs); List <SymbolPrecedence <SYMBOL_ENUM> > result; if (patterns.TryGetValue(input, out result)) { IEnumerable <SymbolPrecedence <SYMBOL_ENUM> > rs_pattern = result.Where(prec => prec.Mode == SymbolPrecedence.ModeEnum.ShiftReduceConflict && prec.ShiftProductions.IsSupersetOf(shift_set) && prec.ReduceProductions.Contains(reduceLhs) && (!prec.StackOperators.Any() || readSymbols.Any(sym => prec.StackOperators.Contains(sym)))) .ToArray(); // [@SELECT] // todo: there is some room for improvement, if the associativy is the same, and the symbols are the same // we could return the last precedence (last in sense of read symbols) or one with highest priority if (rs_pattern.Count() > 1) { errorReporter("Precedence rule overlapping over stack symbols: " + rs_pattern.Select(it => it.StackOperators.Intersect(readSymbols)).Flatten().Distinct() .Select(it => symbolsRep.Get(it)).Join(",")); } return(rs_pattern.LastOrDefault()); } else { return(null); } }
public string ToString(StringRep <SYMBOL_ENUM> symbolsRep) { return("COVERAGE INFO:" + Environment.NewLine + "--------------" + Environment.NewLine + "LHS symbol = symbols that are covered" + Environment.NewLine + Environment.NewLine + covers.Keys.Select(it => symbolsRep.Get(it) + " = " + coverAsChunkSet(it).ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine)); }
public string StateTransStr(StringRep <STATE_ENUM> statesRep) { string result = statesRep.Get(StateIn); if (!StateIn.Equals(StateOut) || stateActions.Any()) { result += " -> " + String.Join("", stateActions.Select(it => it + " -> ")) + statesRep.Get(StateOut); } return(result); }
public string ToString(StringRep <SYMBOL_ENUM> symbolsRep, StringRep <STATE_ENUM> statesRep) { string result = "states: " + StateTransStr(statesRep); result += Environment.NewLine; if (Text == null) { result += "EOF " + Position.XYString() + Environment.NewLine; } else { result += "text " + Position.XYString() + ": " + Text.PrintableString() + Environment.NewLine; } if (Rule == null) { result += "UNRECOGNIZED TEXT" + Environment.NewLine; } else if (!Rule.IsEofRule) { result += "rule [" + Rule.PatternId + "]: " + Rule.ToString(statesRep) + Environment.NewLine; } string indent = ""; if (tokens.Count > 1) { result += "multiple tokens {" + Environment.NewLine; indent = " "; } foreach (TokenMatch <SYMBOL_ENUM> token in Tokens) { result += indent + "token [" + token.ID + "]: "; if (!token.HasToken) { result += "*none*"; } else { result += symbolsRep.Get(token.Token) + Environment.NewLine; result += indent + "value assigned: " + (token.Value == null ? "null" : (Rule == null ? token.Value : token.Value.ToString().PrintableString())); } result += Environment.NewLine; } if (tokens.Count > 1) { result += "}" + Environment.NewLine; } return(result); }
internal void BuildString(StringRep <SYMBOL_ENUM> symbolsRep, StringBuilder sbStates, StringBuilder sbEdges, IEnumerable <string> nfaStateIndices) { sbStates.Append(State.ToString(nfaStateIndices, symbolsRep)); if (nfaStateIndices == null) { sbEdges.Append(EdgesTo .Select(edge => State.Index + " -- " + symbolsRep.Get(edge.Key) + " --> " + edge.Value.State.Index) .Join(Environment.NewLine)); } }
public string ToString(StringRep <SYMBOL_ENUM> symbolsRep) { // lowering the case so we can search a string more effectively in DFA text file string next_lookaheads = NextLookaheads.ToString(symbolsRep, verboseMode: false).ToLower(); string after_lookaheads = AfterLookaheads.ToString(symbolsRep, verboseMode: false).ToLower(); var source = new List <string>(); if (closureParents.Any()) { source.AddRange(closureParents.Select(it => "c:" + it.IndexStr)); } if (shiftParents.Any()) { source.AddRange(shiftParents.Select(it => "s:" + it.IndexStr)); } return(IndexStr + ") " + symbolsRep.Get(LhsSymbol) + " := " + (String.Join(" ", Production.RhsSymbols.Take(RhsSeenCount).Select(it => symbolsRep.Get(it))) + " . " + String.Join(" ", Production.RhsSymbols.Skip(RhsSeenCount).Select(it => symbolsRep.Get(it)))).Trim() + (next_lookaheads.Length > 0 ? "\t (n: " + next_lookaheads + " )" : "") + (after_lookaheads.Length > 0 ? "\t (a: " + after_lookaheads + " )" : "") + (source.Any() ? "\t <-- " + source.Join(" ") : "")); }
// if we have clear action to do (shift/reduce) pass it forward // if not, here we try to recover from syntax error private ActionRecoveryEnum getActionOrRecover(int nodeId, out IEnumerable <ParseAction <SYMBOL_ENUM, TREE_NODE> > parseActions, bool startWithRecovering) { ActionRecoveryEnum success_result = ActionRecoveryEnum.Success; while (true) { if (startWithRecovering) { parseActions = null; } else { // in normal run we ignore grammar conflicts, user should get conflicts just once, at validating stage parseActions = actionTable.Get(nodeId, stackMaster.InputView); } startWithRecovering = false; if (parseActions != null) { ++consecutiveCorrectActionsCount; // it could be success after naive recovery return(success_result); } // trying to recover from old recovery point if (stackMaster.IsForked) { return(ActionRecoveryEnum.SyntaxError); } else { // make a lazy message if (!callUserErrorHandler(stackMaster.InputHead, () => "No action defined at node " + nodeId + " for input \"" + SymbolChunk.Create(stackMaster.InputTokens.Take(lookaheadWidth)).ToString(symbolsRep) + "\" with stack \"" + String.Join(" ", stackMaster.Stack.TakeTail(historyHorizon) .Select(it => symbolsRep.Get(it.Symbol))) + "\".")) { return(ActionRecoveryEnum.StopParsing); } consecutiveCorrectActionsCount = 0; IEnumerable <NfaCell <SYMBOL_ENUM, TREE_NODE> > recovery_items; if (stack.FindLastWhere(it => it.IsRecoverable, it => it.RecoveryItems, out recovery_items)) { if (options.Trace) { parseLog.Last.Value.Recovered = true; } // we would like to get minimal recovery item // i.e. the one which wastes the minimum of the input in order to recover NfaCell <SYMBOL_ENUM, TREE_NODE> min_recovery_item = recovery_items .ArgMin(rec => stackMaster.Input // for each recovery item compute the count of required tokens from input .TakeWhile(it => !rec.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol)).Count()) // not single, because we could hit EOF in several cases .First(); parseActions = new[] { new ParseAction <SYMBOL_ENUM, TREE_NODE>(false, ReductionAction.Create(min_recovery_item)) }; stackMaster.AdvanceInputWhile(it => !min_recovery_item.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol)); // we hit the wall if (IsEndOfInput) { return(ActionRecoveryEnum.SyntaxError); } stackMaster.AdvanceInput(); // advance past the marker // setting stack as if we were the old recovery point // (sometimes we really are, because the last element on the stack can be recovery point) stackMaster.RemoveLastWhile(it => !it.IsRecoverable); return(ActionRecoveryEnum.Recovered); } else if (IsEndOfInput) { return(ActionRecoveryEnum.SyntaxError); } else { if (options.Trace) { parseLog.Last.Value.Recovered = true; } // there is no recovery rule defined by the user so try to // "fix" the errors step by step stackMaster.AdvanceInput(); // further success will be in fact the result of recovery success_result = ActionRecoveryEnum.Recovered; } } } }
public IEnumerable <string> Report(StringRep <SYMBOL_ENUM> symbolsRep) { return(productionsList.Select(prod => symbolsRep.Get(prod.LhsNonTerminal) + " := " + String.Join(" ", prod.RhsSymbols.Select(sym => symbolsRep.Get(sym))))); }
private void validate(Action <string> addWarning) { if (nonTerminals.Concat(terminals).Concat(SyntaxErrorSymbol).Any(x => ((int)(object)x) < 0)) { throw new ArgumentException("All symbols have to have non-negative int representation."); } { // do NOT remove this condition -- if you need multiple start productions, then add on-fly super start production consisting of this start symbol // other code relies on the fact there is only single start production, like DFA worker IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > start_prod = productionsList.Where(it => it.LhsNonTerminal.Equals(StartSymbol)); if (start_prod.Count() != 1) { throw new ArgumentException("There should be exactly 1 productions with start symbol \"" + SymbolsRep.Get(StartSymbol) + "\":" + Environment.NewLine + String.Join(Environment.NewLine, start_prod.Select(it => it.ToString()))); } } { IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > prods_with_start = productionsList.Where(it => it.RhsSymbols.Contains(StartSymbol)); if (prods_with_start.Any()) { throw new ArgumentException("Start symbol \"" + SymbolsRep.Get(StartSymbol) + "\" cannot be used on right hand side of productions:" + Environment.NewLine + String.Join(Environment.NewLine, prods_with_start.Select(it => it.ToString()))); } } if (!productionsList.First().LhsNonTerminal.Equals(StartSymbol)) { throw new ArgumentException(String.Format("Start symbol \"{0}\" should be in the first production.", SymbolsRep.Get(StartSymbol))); } if (productionsList.Any(it => it.LhsNonTerminal.Equals(EofSymbol))) { throw new ArgumentException("There cannot be production for EOF token."); } if (productionsList.Any(prod => prod.RhsSymbols.Any(it => it.Equals(EofSymbol)))) { throw new ArgumentException("EOF token cannot be used explicitly in productions."); } if (productionsList.Any(it => it.LhsNonTerminal.Equals(SyntaxErrorSymbol))) { throw new ArgumentException("There cannot be production for syntax error token."); } { // everything that is derived from S, with S included var reachable_symbols = new HashSet <SYMBOL_ENUM>(); reachable_symbols.Add(StartSymbol); while (true) { bool changed = false; foreach (SYMBOL_ENUM symbol in reachable_symbols.ToList()) { foreach (SYMBOL_ENUM rhs_sym in FilterByLhs(symbol).Select(it => it.RhsSymbols).Flatten()) { if (reachable_symbols.Add(rhs_sym)) { changed = true; } } } if (!changed) { break; } } IEnumerable <SYMBOL_ENUM> dead_lhs = nonTerminals.Where(it => !reachable_symbols.Contains(it)); if (dead_lhs.Any()) { addWarning("Detected dead productions for symbol(s): " + String.Join(",", dead_lhs.Select(it => SymbolsRep.Get(it))) + " in productions:" + Environment.NewLine + String.Join(Environment.NewLine, dead_lhs.Select(lhs => FilterByLhs(lhs).Select(prod => prod.ToString())).Flatten())); } } { var empties = new HashSet <SYMBOL_ENUM>(); while (true) { bool change = false; foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => !empties.Contains(it.LhsNonTerminal))) { if (prod.RhsSymbols.All(it => empties.Contains(it))) { if (empties.Add(prod.LhsNonTerminal)) { change = true; } } } if (!change) { break; } } // check the placement of error token in every error production foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList) { IEnumerable <SYMBOL_ENUM> error_symbols = prod.RhsSymbols.Where(it => it.Equals(SyntaxErrorSymbol)); if (!error_symbols.Any()) { continue; } else if (error_symbols.Count() > 1) { throw new ArgumentException("Only one syntax error token per production: " + prod.PositionDescription); } else { int idx = prod.RhsSymbols.IndexOf(SyntaxErrorSymbol); if (idx != prod.RhsSymbols.Count - 2) { throw new ArgumentException("Syntax error token has to be next to last: " + prod.PositionDescription); } // if (!Terminals.Contains(prod.RhsSymbols[idx + 1])) // throw new ArgumentException("There has to be a terminal or alias non-terminal after syntax error token: " + prod.PositionDescription); if (empties.Contains(prod.RhsSymbols[idx + 1])) { throw new ArgumentException("There has to be a terminal or non-empty non-terminal after syntax error token: " + prod.PositionDescription); } } } } // checks if one non-terminal has more than 1 error recovery production // this code has 2 known to me weak points: // * obvious -- if one production looks like a prefix of other in regard of error symbol // a := C B Error D // a := C Error D // it should be checked (for now it is not, checking it is not that obvious) // * complex -- if one production contains non-terminal which also has error symbol in it // a := b Error C // b := d Error X // it is not detected, but I don't even have clear mind to think if this is useful/wrong/or something else { // getting all productions with syntax error symbol var recovery_prods = new List <IEnumerable <SYMBOL_ENUM> >(); foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => it.RhsSymbols.Contains(SyntaxErrorSymbol))) { // taking LHS + RHS up to error symbol recovery_prods.Add(prod.LhsNonTerminal.Concat(prod.RhsSymbols.TakeWhile(it => !it.Equals(SyntaxErrorSymbol))).ToArray()); } // grouping and filtering those with more than 1 error symbol IEnumerable <string> doubled_recovery = recovery_prods.GroupBy(it => it, new SequenceEquality <SYMBOL_ENUM>()) .Where(it => it.Count() > 1) .Select(it => SymbolsRep.Get(it.Key.First())); if (!ExperimentsSettings.NonRecursiveProductionsElimination && doubled_recovery.Any()) { throw new ArgumentException("Error -- multiple productions with recovery point for: " + doubled_recovery.Select(s => "\"" + s + "\"").Join(", ")); } } }
public ActionTable <SYMBOL_ENUM, TREE_NODE> FillActionTable(Productions <SYMBOL_ENUM, TREE_NODE> productions, FirstSets <SYMBOL_ENUM> firstSets, CoverSets <SYMBOL_ENUM> coverSets, HorizonSets <SYMBOL_ENUM> horizonSets, int lookaheadWidth, Dfa <SYMBOL_ENUM, TREE_NODE> dfa, PrecedenceTable <SYMBOL_ENUM> precedenceTable, GrammarReport <SYMBOL_ENUM, TREE_NODE> report) { this.coverSets = coverSets; this.horizonSets = horizonSets; this.report = report; this.precedenceTable = precedenceTable ?? new PrecedenceTable <SYMBOL_ENUM>(productions.SymbolsRep); this.symbolsRep = productions.SymbolsRep; actionTable = new ActionTable <SYMBOL_ENUM, TREE_NODE>(dfa, productions, lookaheadWidth); foreach (Node <SYMBOL_ENUM, TREE_NODE> node in dfa.Nodes) { foreach (SymbolChunk <SYMBOL_ENUM> chunk in node.State.PossibleInputs) { ParseAction <SYMBOL_ENUM, TREE_NODE> action_data = computeAction(node, chunk); if (!report.HasGrammarErrors) { actionTable.Add(node.State.Index, chunk, new[] { action_data }); } } // checking recovery conflicts IEnumerable <SingleState <SYMBOL_ENUM, TREE_NODE> > recovery_items = node.State.ParsingActiveItems .Where(it => it.IsAtRecoveryPoint); var recovery_stats = DynamicDictionary.CreateWithDefault <SYMBOL_ENUM, List <SingleState <SYMBOL_ENUM, TREE_NODE> > >(); foreach (SingleState <SYMBOL_ENUM, TREE_NODE> rec_state in recovery_items) { foreach (SymbolChunk <SYMBOL_ENUM> first in firstSets[rec_state.RecoveryMarkerSymbol].Chunks) { recovery_stats[first.Symbols.First()].Add(rec_state); } } foreach (var pair in recovery_stats.Where(it => it.Value.Count > 1)) { report.AddError(pair.Value.Select(it => it.IndexStr), "Recovery item conflict on \"" + symbolsRep.Get(pair.Key) + "\"."); } } report.AddWarnings(precedenceTable.GetUnusedEntries(symbolsRep)); if (report.HasGrammarErrors) { return(null); } else { report.ActionTable = actionTable; return(actionTable); } }
public string ToString(StringRep <SYMBOL_ENUM> symbolsRep) { return("Symbol = follow symbol <- production source ~ leaked via production" + Environment.NewLine + Environment.NewLine + sets.Select(it => symbolsRep.Get(it.Key) + " = " + it.Value.ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine)); }