findShiftReducePattern(IEnumerable <SYMBOL_ENUM> shiftLhs, SYMBOL_ENUM reduceLhs,
                               IEnumerable <SYMBOL_ENUM> readSymbols, SymbolChunk <SYMBOL_ENUM> input,
                               Action <string> errorReporter)
        {
            HashSet <SYMBOL_ENUM> shift_set = new HashSet <SYMBOL_ENUM>(shiftLhs);

            List <SymbolPrecedence <SYMBOL_ENUM> > result;

            if (patterns.TryGetValue(input, out result))
            {
                IEnumerable <SymbolPrecedence <SYMBOL_ENUM> > rs_pattern = result.Where(prec =>
                                                                                        prec.Mode == SymbolPrecedence.ModeEnum.ShiftReduceConflict &&
                                                                                        prec.ShiftProductions.IsSupersetOf(shift_set) &&
                                                                                        prec.ReduceProductions.Contains(reduceLhs) &&
                                                                                        (!prec.StackOperators.Any() || readSymbols.Any(sym => prec.StackOperators.Contains(sym))))
                                                                           .ToArray();

                // [@SELECT]
                // todo: there is some room for improvement, if the associativy is the same, and the symbols are the same
                // we could return the last precedence (last in sense of read symbols) or one with highest priority
                if (rs_pattern.Count() > 1)
                {
                    errorReporter("Precedence rule overlapping over stack symbols: " + rs_pattern.Select(it => it.StackOperators.Intersect(readSymbols)).Flatten().Distinct()
                                  .Select(it => symbolsRep.Get(it)).Join(","));
                }

                return(rs_pattern.LastOrDefault());
            }
            else
            {
                return(null);
            }
        }
 public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
 {
     return("COVERAGE INFO:" + Environment.NewLine
            + "--------------" + Environment.NewLine
            + "LHS symbol = symbols that are covered"
            + Environment.NewLine + Environment.NewLine
            + covers.Keys.Select(it => symbolsRep.Get(it) + " = " + coverAsChunkSet(it).ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine));
 }
            public string StateTransStr(StringRep <STATE_ENUM> statesRep)
            {
                string result = statesRep.Get(StateIn);

                if (!StateIn.Equals(StateOut) || stateActions.Any())
                {
                    result += " -> " + String.Join("", stateActions.Select(it => it + " -> ")) + statesRep.Get(StateOut);
                }
                return(result);
            }
            public string ToString(StringRep <SYMBOL_ENUM> symbolsRep, StringRep <STATE_ENUM> statesRep)
            {
                string result = "states: " + StateTransStr(statesRep);

                result += Environment.NewLine;
                if (Text == null)
                {
                    result += "EOF " + Position.XYString() + Environment.NewLine;
                }
                else
                {
                    result += "text " + Position.XYString() + ": " + Text.PrintableString() + Environment.NewLine;
                }

                if (Rule == null)
                {
                    result += "UNRECOGNIZED TEXT" + Environment.NewLine;
                }
                else if (!Rule.IsEofRule)
                {
                    result += "rule [" + Rule.PatternId + "]: " + Rule.ToString(statesRep) + Environment.NewLine;
                }

                string indent = "";

                if (tokens.Count > 1)
                {
                    result += "multiple tokens {" + Environment.NewLine;
                    indent  = "  ";
                }

                foreach (TokenMatch <SYMBOL_ENUM> token in Tokens)
                {
                    result += indent + "token [" + token.ID + "]: ";
                    if (!token.HasToken)
                    {
                        result += "*none*";
                    }
                    else
                    {
                        result += symbolsRep.Get(token.Token) + Environment.NewLine;
                        result += indent + "value assigned: " + (token.Value == null ? "null" : (Rule == null ? token.Value : token.Value.ToString().PrintableString()));
                    }
                    result += Environment.NewLine;
                }

                if (tokens.Count > 1)
                {
                    result += "}" + Environment.NewLine;
                }

                return(result);
            }
Exemple #5
0
        internal void BuildString(StringRep <SYMBOL_ENUM> symbolsRep,
                                  StringBuilder sbStates,
                                  StringBuilder sbEdges,
                                  IEnumerable <string> nfaStateIndices)
        {
            sbStates.Append(State.ToString(nfaStateIndices, symbolsRep));

            if (nfaStateIndices == null)
            {
                sbEdges.Append(EdgesTo
                               .Select(edge => State.Index + " -- " + symbolsRep.Get(edge.Key) + " --> " + edge.Value.State.Index)
                               .Join(Environment.NewLine));
            }
        }
Exemple #6
0
        public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
        {
            // lowering the case so we can search a string more effectively in DFA text file
            string next_lookaheads  = NextLookaheads.ToString(symbolsRep, verboseMode: false).ToLower();
            string after_lookaheads = AfterLookaheads.ToString(symbolsRep, verboseMode: false).ToLower();
            var    source           = new List <string>();

            if (closureParents.Any())
            {
                source.AddRange(closureParents.Select(it => "c:" + it.IndexStr));
            }
            if (shiftParents.Any())
            {
                source.AddRange(shiftParents.Select(it => "s:" + it.IndexStr));
            }

            return(IndexStr + ")  " + symbolsRep.Get(LhsSymbol) + " := "
                   + (String.Join(" ", Production.RhsSymbols.Take(RhsSeenCount).Select(it => symbolsRep.Get(it)))
                      + " . "
                      + String.Join(" ", Production.RhsSymbols.Skip(RhsSeenCount).Select(it => symbolsRep.Get(it)))).Trim()
                   + (next_lookaheads.Length > 0 ? "\t (n: " + next_lookaheads + " )" : "")
                   + (after_lookaheads.Length > 0 ? "\t (a: " + after_lookaheads + " )" : "")
                   + (source.Any() ? "\t <-- " + source.Join(" ") : ""));
        }
Exemple #7
0
        // if we have clear action to do (shift/reduce) pass it forward
        // if not, here we try to recover from syntax error
        private ActionRecoveryEnum getActionOrRecover(int nodeId,
                                                      out IEnumerable <ParseAction <SYMBOL_ENUM, TREE_NODE> > parseActions,
                                                      bool startWithRecovering)
        {
            ActionRecoveryEnum success_result = ActionRecoveryEnum.Success;

            while (true)
            {
                if (startWithRecovering)
                {
                    parseActions = null;
                }
                else
                {
                    // in normal run we ignore grammar conflicts, user should get conflicts just once, at validating stage
                    parseActions = actionTable.Get(nodeId, stackMaster.InputView);
                }

                startWithRecovering = false;

                if (parseActions != null)
                {
                    ++consecutiveCorrectActionsCount;
                    // it could be success after naive recovery
                    return(success_result);
                }



                // trying to recover from old recovery point
                if (stackMaster.IsForked)
                {
                    return(ActionRecoveryEnum.SyntaxError);
                }
                else
                {
                    // make a lazy message
                    if (!callUserErrorHandler(stackMaster.InputHead, () => "No action defined at node " + nodeId
                                              + " for input \"" + SymbolChunk.Create(stackMaster.InputTokens.Take(lookaheadWidth)).ToString(symbolsRep) + "\" with stack \""
                                              + String.Join(" ", stackMaster.Stack.TakeTail(historyHorizon)
                                                            .Select(it => symbolsRep.Get(it.Symbol))) + "\"."))
                    {
                        return(ActionRecoveryEnum.StopParsing);
                    }

                    consecutiveCorrectActionsCount = 0;
                    IEnumerable <NfaCell <SYMBOL_ENUM, TREE_NODE> > recovery_items;

                    if (stack.FindLastWhere(it => it.IsRecoverable, it => it.RecoveryItems, out recovery_items))
                    {
                        if (options.Trace)
                        {
                            parseLog.Last.Value.Recovered = true;
                        }

                        // we would like to get minimal recovery item
                        // i.e. the one which wastes the minimum of the input in order to recover
                        NfaCell <SYMBOL_ENUM, TREE_NODE> min_recovery_item = recovery_items
                                                                             .ArgMin(rec => stackMaster.Input
                                                                             // for each recovery item compute the count of required tokens from input
                                                                                     .TakeWhile(it => !rec.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol)).Count())
                                                                             // not single, because we could hit EOF in several cases
                                                                             .First();

                        parseActions = new[] { new ParseAction <SYMBOL_ENUM, TREE_NODE>(false, ReductionAction.Create(min_recovery_item)) };

                        stackMaster.AdvanceInputWhile(it => !min_recovery_item.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol));

                        // we hit the wall
                        if (IsEndOfInput)
                        {
                            return(ActionRecoveryEnum.SyntaxError);
                        }

                        stackMaster.AdvanceInput(); // advance past the marker

                        // setting stack as if we were the old recovery point
                        // (sometimes we really are, because the last element on the stack can be recovery point)
                        stackMaster.RemoveLastWhile(it => !it.IsRecoverable);

                        return(ActionRecoveryEnum.Recovered);
                    }
                    else if (IsEndOfInput)
                    {
                        return(ActionRecoveryEnum.SyntaxError);
                    }
                    else
                    {
                        if (options.Trace)
                        {
                            parseLog.Last.Value.Recovered = true;
                        }

                        // there is no recovery rule defined by the user so try to
                        // "fix" the errors step by step
                        stackMaster.AdvanceInput();
                        // further success will be in fact the result of recovery
                        success_result = ActionRecoveryEnum.Recovered;
                    }
                }
            }
        }
Exemple #8
0
 public IEnumerable <string> Report(StringRep <SYMBOL_ENUM> symbolsRep)
 {
     return(productionsList.Select(prod => symbolsRep.Get(prod.LhsNonTerminal)
                                   + " := " + String.Join(" ", prod.RhsSymbols.Select(sym => symbolsRep.Get(sym)))));
 }
Exemple #9
0
        private void validate(Action <string> addWarning)
        {
            if (nonTerminals.Concat(terminals).Concat(SyntaxErrorSymbol).Any(x => ((int)(object)x) < 0))
            {
                throw new ArgumentException("All symbols have to have non-negative int representation.");
            }

            {
                // do NOT remove this condition -- if you need multiple start productions, then add on-fly super start production consisting of this start symbol
                // other code relies on the fact there is only single start production, like DFA worker
                IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > start_prod = productionsList.Where(it => it.LhsNonTerminal.Equals(StartSymbol));
                if (start_prod.Count() != 1)
                {
                    throw new ArgumentException("There should be exactly 1 productions with start symbol \"" + SymbolsRep.Get(StartSymbol) + "\":" + Environment.NewLine
                                                + String.Join(Environment.NewLine, start_prod.Select(it => it.ToString())));
                }
            }

            {
                IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > prods_with_start = productionsList.Where(it => it.RhsSymbols.Contains(StartSymbol));
                if (prods_with_start.Any())
                {
                    throw new ArgumentException("Start symbol \"" + SymbolsRep.Get(StartSymbol) + "\" cannot be used on right hand side of productions:" + Environment.NewLine
                                                + String.Join(Environment.NewLine, prods_with_start.Select(it => it.ToString())));
                }
            }

            if (!productionsList.First().LhsNonTerminal.Equals(StartSymbol))
            {
                throw new ArgumentException(String.Format("Start symbol \"{0}\" should be in the first production.", SymbolsRep.Get(StartSymbol)));
            }

            if (productionsList.Any(it => it.LhsNonTerminal.Equals(EofSymbol)))
            {
                throw new ArgumentException("There cannot be production for EOF token.");
            }

            if (productionsList.Any(prod => prod.RhsSymbols.Any(it => it.Equals(EofSymbol))))
            {
                throw new ArgumentException("EOF token cannot be used explicitly in productions.");
            }

            if (productionsList.Any(it => it.LhsNonTerminal.Equals(SyntaxErrorSymbol)))
            {
                throw new ArgumentException("There cannot be production for syntax error token.");
            }

            {
                // everything that is derived from S, with S included
                var reachable_symbols = new HashSet <SYMBOL_ENUM>();
                reachable_symbols.Add(StartSymbol);

                while (true)
                {
                    bool changed = false;
                    foreach (SYMBOL_ENUM symbol in reachable_symbols.ToList())
                    {
                        foreach (SYMBOL_ENUM rhs_sym in FilterByLhs(symbol).Select(it => it.RhsSymbols).Flatten())
                        {
                            if (reachable_symbols.Add(rhs_sym))
                            {
                                changed = true;
                            }
                        }
                    }

                    if (!changed)
                    {
                        break;
                    }
                }

                IEnumerable <SYMBOL_ENUM> dead_lhs = nonTerminals.Where(it => !reachable_symbols.Contains(it));
                if (dead_lhs.Any())
                {
                    addWarning("Detected dead productions for symbol(s): "
                               + String.Join(",", dead_lhs.Select(it => SymbolsRep.Get(it))) + " in productions:" + Environment.NewLine
                               + String.Join(Environment.NewLine, dead_lhs.Select(lhs => FilterByLhs(lhs).Select(prod => prod.ToString())).Flatten()));
                }
            }

            {
                var empties = new HashSet <SYMBOL_ENUM>();
                while (true)
                {
                    bool change = false;
                    foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => !empties.Contains(it.LhsNonTerminal)))
                    {
                        if (prod.RhsSymbols.All(it => empties.Contains(it)))
                        {
                            if (empties.Add(prod.LhsNonTerminal))
                            {
                                change = true;
                            }
                        }
                    }

                    if (!change)
                    {
                        break;
                    }
                }

                // check the placement of error token in every error production
                foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList)
                {
                    IEnumerable <SYMBOL_ENUM> error_symbols = prod.RhsSymbols.Where(it => it.Equals(SyntaxErrorSymbol));
                    if (!error_symbols.Any())
                    {
                        continue;
                    }
                    else if (error_symbols.Count() > 1)
                    {
                        throw new ArgumentException("Only one syntax error token per production: " + prod.PositionDescription);
                    }
                    else
                    {
                        int idx = prod.RhsSymbols.IndexOf(SyntaxErrorSymbol);
                        if (idx != prod.RhsSymbols.Count - 2)
                        {
                            throw new ArgumentException("Syntax error token has to be next to last: " + prod.PositionDescription);
                        }
//                        if (!Terminals.Contains(prod.RhsSymbols[idx + 1]))
//                          throw new ArgumentException("There has to be a terminal or alias non-terminal after syntax error token: " + prod.PositionDescription);
                        if (empties.Contains(prod.RhsSymbols[idx + 1]))
                        {
                            throw new ArgumentException("There has to be a terminal or non-empty non-terminal after syntax error token: " + prod.PositionDescription);
                        }
                    }
                }
            }

            // checks if one non-terminal has more than 1 error recovery production
            // this code has 2 known to me weak points:
            // * obvious -- if one production looks like a prefix of other in regard of error symbol
            //   a := C B Error D
            //   a := C   Error D
            // it should be checked (for now it is not, checking it is not that obvious)
            // * complex -- if one production contains non-terminal which also has error symbol in it
            //  a := b Error C
            //  b := d Error X
            // it is not detected, but I don't even have clear mind to think if this is useful/wrong/or something else
            {
                // getting all productions with syntax error symbol
                var recovery_prods = new List <IEnumerable <SYMBOL_ENUM> >();
                foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => it.RhsSymbols.Contains(SyntaxErrorSymbol)))
                {
                    // taking LHS + RHS up to error symbol
                    recovery_prods.Add(prod.LhsNonTerminal.Concat(prod.RhsSymbols.TakeWhile(it => !it.Equals(SyntaxErrorSymbol))).ToArray());
                }

                // grouping and filtering those with more than 1 error symbol
                IEnumerable <string> doubled_recovery = recovery_prods.GroupBy(it => it, new SequenceEquality <SYMBOL_ENUM>())
                                                        .Where(it => it.Count() > 1)
                                                        .Select(it => SymbolsRep.Get(it.Key.First()));

                if (!ExperimentsSettings.NonRecursiveProductionsElimination && doubled_recovery.Any())
                {
                    throw new ArgumentException("Error -- multiple productions with recovery point for: " + doubled_recovery.Select(s => "\"" + s + "\"").Join(", "));
                }
            }
        }
        public ActionTable <SYMBOL_ENUM, TREE_NODE> FillActionTable(Productions <SYMBOL_ENUM, TREE_NODE> productions,
                                                                    FirstSets <SYMBOL_ENUM> firstSets,
                                                                    CoverSets <SYMBOL_ENUM> coverSets,
                                                                    HorizonSets <SYMBOL_ENUM> horizonSets,
                                                                    int lookaheadWidth,
                                                                    Dfa <SYMBOL_ENUM, TREE_NODE> dfa,
                                                                    PrecedenceTable <SYMBOL_ENUM> precedenceTable,
                                                                    GrammarReport <SYMBOL_ENUM, TREE_NODE> report)
        {
            this.coverSets       = coverSets;
            this.horizonSets     = horizonSets;
            this.report          = report;
            this.precedenceTable = precedenceTable ?? new PrecedenceTable <SYMBOL_ENUM>(productions.SymbolsRep);
            this.symbolsRep      = productions.SymbolsRep;
            actionTable          = new ActionTable <SYMBOL_ENUM, TREE_NODE>(dfa, productions,
                                                                            lookaheadWidth);

            foreach (Node <SYMBOL_ENUM, TREE_NODE> node in dfa.Nodes)
            {
                foreach (SymbolChunk <SYMBOL_ENUM> chunk in node.State.PossibleInputs)
                {
                    ParseAction <SYMBOL_ENUM, TREE_NODE> action_data = computeAction(node, chunk);

                    if (!report.HasGrammarErrors)
                    {
                        actionTable.Add(node.State.Index, chunk, new[] { action_data });
                    }
                }

                // checking recovery conflicts

                IEnumerable <SingleState <SYMBOL_ENUM, TREE_NODE> > recovery_items = node.State.ParsingActiveItems
                                                                                     .Where(it => it.IsAtRecoveryPoint);

                var recovery_stats = DynamicDictionary.CreateWithDefault <SYMBOL_ENUM, List <SingleState <SYMBOL_ENUM, TREE_NODE> > >();
                foreach (SingleState <SYMBOL_ENUM, TREE_NODE> rec_state in recovery_items)
                {
                    foreach (SymbolChunk <SYMBOL_ENUM> first in firstSets[rec_state.RecoveryMarkerSymbol].Chunks)
                    {
                        recovery_stats[first.Symbols.First()].Add(rec_state);
                    }
                }

                foreach (var pair in recovery_stats.Where(it => it.Value.Count > 1))
                {
                    report.AddError(pair.Value.Select(it => it.IndexStr), "Recovery item conflict on \"" + symbolsRep.Get(pair.Key) + "\".");
                }
            }

            report.AddWarnings(precedenceTable.GetUnusedEntries(symbolsRep));

            if (report.HasGrammarErrors)
            {
                return(null);
            }
            else
            {
                report.ActionTable = actionTable;
                return(actionTable);
            }
        }
 public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
 {
     return("Symbol = follow symbol <- production source ~ leaked via production"
            + Environment.NewLine + Environment.NewLine
            + sets.Select(it => symbolsRep.Get(it.Key) + " = " + it.Value.ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine));
 }