findShiftReducePattern(IEnumerable <SYMBOL_ENUM> shiftLhs, SYMBOL_ENUM reduceLhs,
                               IEnumerable <SYMBOL_ENUM> readSymbols, SymbolChunk <SYMBOL_ENUM> input,
                               Action <string> errorReporter)
            HashSet <SYMBOL_ENUM> shift_set = new HashSet <SYMBOL_ENUM>(shiftLhs);

            List <SymbolPrecedence <SYMBOL_ENUM> > result;

            if (patterns.TryGetValue(input, out result))
                IEnumerable <SymbolPrecedence <SYMBOL_ENUM> > rs_pattern = result.Where(prec =>
                                                                                        prec.Mode == SymbolPrecedence.ModeEnum.ShiftReduceConflict &&
                                                                                        prec.ShiftProductions.IsSupersetOf(shift_set) &&
                                                                                        prec.ReduceProductions.Contains(reduceLhs) &&
                                                                                        (!prec.StackOperators.Any() || readSymbols.Any(sym => prec.StackOperators.Contains(sym))))

                // [@SELECT]
                // todo: there is some room for improvement, if the associativy is the same, and the symbols are the same
                // we could return the last precedence (last in sense of read symbols) or one with highest priority
                if (rs_pattern.Count() > 1)
                    errorReporter("Precedence rule overlapping over stack symbols: " + rs_pattern.Select(it => it.StackOperators.Intersect(readSymbols)).Flatten().Distinct()
                                  .Select(it => symbolsRep.Get(it)).Join(","));

 public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
     return("COVERAGE INFO:" + Environment.NewLine
            + "--------------" + Environment.NewLine
            + "LHS symbol = symbols that are covered"
            + Environment.NewLine + Environment.NewLine
            + covers.Keys.Select(it => symbolsRep.Get(it) + " = " + coverAsChunkSet(it).ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine));
            public string StateTransStr(StringRep <STATE_ENUM> statesRep)
                string result = statesRep.Get(StateIn);

                if (!StateIn.Equals(StateOut) || stateActions.Any())
                    result += " -> " + String.Join("", stateActions.Select(it => it + " -> ")) + statesRep.Get(StateOut);
            public string ToString(StringRep <SYMBOL_ENUM> symbolsRep, StringRep <STATE_ENUM> statesRep)
                string result = "states: " + StateTransStr(statesRep);

                result += Environment.NewLine;
                if (Text == null)
                    result += "EOF " + Position.XYString() + Environment.NewLine;
                    result += "text " + Position.XYString() + ": " + Text.PrintableString() + Environment.NewLine;

                if (Rule == null)
                    result += "UNRECOGNIZED TEXT" + Environment.NewLine;
                else if (!Rule.IsEofRule)
                    result += "rule [" + Rule.PatternId + "]: " + Rule.ToString(statesRep) + Environment.NewLine;

                string indent = "";

                if (tokens.Count > 1)
                    result += "multiple tokens {" + Environment.NewLine;
                    indent  = "  ";

                foreach (TokenMatch <SYMBOL_ENUM> token in Tokens)
                    result += indent + "token [" + token.ID + "]: ";
                    if (!token.HasToken)
                        result += "*none*";
                        result += symbolsRep.Get(token.Token) + Environment.NewLine;
                        result += indent + "value assigned: " + (token.Value == null ? "null" : (Rule == null ? token.Value : token.Value.ToString().PrintableString()));
                    result += Environment.NewLine;

                if (tokens.Count > 1)
                    result += "}" + Environment.NewLine;

Exemple #5
        internal void BuildString(StringRep <SYMBOL_ENUM> symbolsRep,
                                  StringBuilder sbStates,
                                  StringBuilder sbEdges,
                                  IEnumerable <string> nfaStateIndices)
            sbStates.Append(State.ToString(nfaStateIndices, symbolsRep));

            if (nfaStateIndices == null)
                               .Select(edge => State.Index + " -- " + symbolsRep.Get(edge.Key) + " --> " + edge.Value.State.Index)
Exemple #6
        public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
            // lowering the case so we can search a string more effectively in DFA text file
            string next_lookaheads  = NextLookaheads.ToString(symbolsRep, verboseMode: false).ToLower();
            string after_lookaheads = AfterLookaheads.ToString(symbolsRep, verboseMode: false).ToLower();
            var    source           = new List <string>();

            if (closureParents.Any())
                source.AddRange(closureParents.Select(it => "c:" + it.IndexStr));
            if (shiftParents.Any())
                source.AddRange(shiftParents.Select(it => "s:" + it.IndexStr));

            return(IndexStr + ")  " + symbolsRep.Get(LhsSymbol) + " := "
                   + (String.Join(" ", Production.RhsSymbols.Take(RhsSeenCount).Select(it => symbolsRep.Get(it)))
                      + " . "
                      + String.Join(" ", Production.RhsSymbols.Skip(RhsSeenCount).Select(it => symbolsRep.Get(it)))).Trim()
                   + (next_lookaheads.Length > 0 ? "\t (n: " + next_lookaheads + " )" : "")
                   + (after_lookaheads.Length > 0 ? "\t (a: " + after_lookaheads + " )" : "")
                   + (source.Any() ? "\t <-- " + source.Join(" ") : ""));
Exemple #7
        // if we have clear action to do (shift/reduce) pass it forward
        // if not, here we try to recover from syntax error
        private ActionRecoveryEnum getActionOrRecover(int nodeId,
                                                      out IEnumerable <ParseAction <SYMBOL_ENUM, TREE_NODE> > parseActions,
                                                      bool startWithRecovering)
            ActionRecoveryEnum success_result = ActionRecoveryEnum.Success;

            while (true)
                if (startWithRecovering)
                    parseActions = null;
                    // in normal run we ignore grammar conflicts, user should get conflicts just once, at validating stage
                    parseActions = actionTable.Get(nodeId, stackMaster.InputView);

                startWithRecovering = false;

                if (parseActions != null)
                    // it could be success after naive recovery

                // trying to recover from old recovery point
                if (stackMaster.IsForked)
                    // make a lazy message
                    if (!callUserErrorHandler(stackMaster.InputHead, () => "No action defined at node " + nodeId
                                              + " for input \"" + SymbolChunk.Create(stackMaster.InputTokens.Take(lookaheadWidth)).ToString(symbolsRep) + "\" with stack \""
                                              + String.Join(" ", stackMaster.Stack.TakeTail(historyHorizon)
                                                            .Select(it => symbolsRep.Get(it.Symbol))) + "\"."))

                    consecutiveCorrectActionsCount = 0;
                    IEnumerable <NfaCell <SYMBOL_ENUM, TREE_NODE> > recovery_items;

                    if (stack.FindLastWhere(it => it.IsRecoverable, it => it.RecoveryItems, out recovery_items))
                        if (options.Trace)
                            parseLog.Last.Value.Recovered = true;

                        // we would like to get minimal recovery item
                        // i.e. the one which wastes the minimum of the input in order to recover
                        NfaCell <SYMBOL_ENUM, TREE_NODE> min_recovery_item = recovery_items
                                                                             .ArgMin(rec => stackMaster.Input
                                                                             // for each recovery item compute the count of required tokens from input
                                                                                     .TakeWhile(it => !rec.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol)).Count())
                                                                             // not single, because we could hit EOF in several cases

                        parseActions = new[] { new ParseAction <SYMBOL_ENUM, TREE_NODE>(false, ReductionAction.Create(min_recovery_item)) };

                        stackMaster.AdvanceInputWhile(it => !min_recovery_item.MatchesRecoveryTerminal(it.Token) && !it.Token.Equals(EofSymbol));

                        // we hit the wall
                        if (IsEndOfInput)

                        stackMaster.AdvanceInput(); // advance past the marker

                        // setting stack as if we were the old recovery point
                        // (sometimes we really are, because the last element on the stack can be recovery point)
                        stackMaster.RemoveLastWhile(it => !it.IsRecoverable);

                    else if (IsEndOfInput)
                        if (options.Trace)
                            parseLog.Last.Value.Recovered = true;

                        // there is no recovery rule defined by the user so try to
                        // "fix" the errors step by step
                        // further success will be in fact the result of recovery
                        success_result = ActionRecoveryEnum.Recovered;
Exemple #8
 public IEnumerable <string> Report(StringRep <SYMBOL_ENUM> symbolsRep)
     return(productionsList.Select(prod => symbolsRep.Get(prod.LhsNonTerminal)
                                   + " := " + String.Join(" ", prod.RhsSymbols.Select(sym => symbolsRep.Get(sym)))));
Exemple #9
        private void validate(Action <string> addWarning)
            if (nonTerminals.Concat(terminals).Concat(SyntaxErrorSymbol).Any(x => ((int)(object)x) < 0))
                throw new ArgumentException("All symbols have to have non-negative int representation.");

                // do NOT remove this condition -- if you need multiple start productions, then add on-fly super start production consisting of this start symbol
                // other code relies on the fact there is only single start production, like DFA worker
                IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > start_prod = productionsList.Where(it => it.LhsNonTerminal.Equals(StartSymbol));
                if (start_prod.Count() != 1)
                    throw new ArgumentException("There should be exactly 1 productions with start symbol \"" + SymbolsRep.Get(StartSymbol) + "\":" + Environment.NewLine
                                                + String.Join(Environment.NewLine, start_prod.Select(it => it.ToString())));

                IEnumerable <Production <SYMBOL_ENUM, TREE_NODE> > prods_with_start = productionsList.Where(it => it.RhsSymbols.Contains(StartSymbol));
                if (prods_with_start.Any())
                    throw new ArgumentException("Start symbol \"" + SymbolsRep.Get(StartSymbol) + "\" cannot be used on right hand side of productions:" + Environment.NewLine
                                                + String.Join(Environment.NewLine, prods_with_start.Select(it => it.ToString())));

            if (!productionsList.First().LhsNonTerminal.Equals(StartSymbol))
                throw new ArgumentException(String.Format("Start symbol \"{0}\" should be in the first production.", SymbolsRep.Get(StartSymbol)));

            if (productionsList.Any(it => it.LhsNonTerminal.Equals(EofSymbol)))
                throw new ArgumentException("There cannot be production for EOF token.");

            if (productionsList.Any(prod => prod.RhsSymbols.Any(it => it.Equals(EofSymbol))))
                throw new ArgumentException("EOF token cannot be used explicitly in productions.");

            if (productionsList.Any(it => it.LhsNonTerminal.Equals(SyntaxErrorSymbol)))
                throw new ArgumentException("There cannot be production for syntax error token.");

                // everything that is derived from S, with S included
                var reachable_symbols = new HashSet <SYMBOL_ENUM>();

                while (true)
                    bool changed = false;
                    foreach (SYMBOL_ENUM symbol in reachable_symbols.ToList())
                        foreach (SYMBOL_ENUM rhs_sym in FilterByLhs(symbol).Select(it => it.RhsSymbols).Flatten())
                            if (reachable_symbols.Add(rhs_sym))
                                changed = true;

                    if (!changed)

                IEnumerable <SYMBOL_ENUM> dead_lhs = nonTerminals.Where(it => !reachable_symbols.Contains(it));
                if (dead_lhs.Any())
                    addWarning("Detected dead productions for symbol(s): "
                               + String.Join(",", dead_lhs.Select(it => SymbolsRep.Get(it))) + " in productions:" + Environment.NewLine
                               + String.Join(Environment.NewLine, dead_lhs.Select(lhs => FilterByLhs(lhs).Select(prod => prod.ToString())).Flatten()));

                var empties = new HashSet <SYMBOL_ENUM>();
                while (true)
                    bool change = false;
                    foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => !empties.Contains(it.LhsNonTerminal)))
                        if (prod.RhsSymbols.All(it => empties.Contains(it)))
                            if (empties.Add(prod.LhsNonTerminal))
                                change = true;

                    if (!change)

                // check the placement of error token in every error production
                foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList)
                    IEnumerable <SYMBOL_ENUM> error_symbols = prod.RhsSymbols.Where(it => it.Equals(SyntaxErrorSymbol));
                    if (!error_symbols.Any())
                    else if (error_symbols.Count() > 1)
                        throw new ArgumentException("Only one syntax error token per production: " + prod.PositionDescription);
                        int idx = prod.RhsSymbols.IndexOf(SyntaxErrorSymbol);
                        if (idx != prod.RhsSymbols.Count - 2)
                            throw new ArgumentException("Syntax error token has to be next to last: " + prod.PositionDescription);
//                        if (!Terminals.Contains(prod.RhsSymbols[idx + 1]))
//                          throw new ArgumentException("There has to be a terminal or alias non-terminal after syntax error token: " + prod.PositionDescription);
                        if (empties.Contains(prod.RhsSymbols[idx + 1]))
                            throw new ArgumentException("There has to be a terminal or non-empty non-terminal after syntax error token: " + prod.PositionDescription);

            // checks if one non-terminal has more than 1 error recovery production
            // this code has 2 known to me weak points:
            // * obvious -- if one production looks like a prefix of other in regard of error symbol
            //   a := C B Error D
            //   a := C   Error D
            // it should be checked (for now it is not, checking it is not that obvious)
            // * complex -- if one production contains non-terminal which also has error symbol in it
            //  a := b Error C
            //  b := d Error X
            // it is not detected, but I don't even have clear mind to think if this is useful/wrong/or something else
                // getting all productions with syntax error symbol
                var recovery_prods = new List <IEnumerable <SYMBOL_ENUM> >();
                foreach (Production <SYMBOL_ENUM, TREE_NODE> prod in productionsList.Where(it => it.RhsSymbols.Contains(SyntaxErrorSymbol)))
                    // taking LHS + RHS up to error symbol
                    recovery_prods.Add(prod.LhsNonTerminal.Concat(prod.RhsSymbols.TakeWhile(it => !it.Equals(SyntaxErrorSymbol))).ToArray());

                // grouping and filtering those with more than 1 error symbol
                IEnumerable <string> doubled_recovery = recovery_prods.GroupBy(it => it, new SequenceEquality <SYMBOL_ENUM>())
                                                        .Where(it => it.Count() > 1)
                                                        .Select(it => SymbolsRep.Get(it.Key.First()));

                if (!ExperimentsSettings.NonRecursiveProductionsElimination && doubled_recovery.Any())
                    throw new ArgumentException("Error -- multiple productions with recovery point for: " + doubled_recovery.Select(s => "\"" + s + "\"").Join(", "));
        public ActionTable <SYMBOL_ENUM, TREE_NODE> FillActionTable(Productions <SYMBOL_ENUM, TREE_NODE> productions,
                                                                    FirstSets <SYMBOL_ENUM> firstSets,
                                                                    CoverSets <SYMBOL_ENUM> coverSets,
                                                                    HorizonSets <SYMBOL_ENUM> horizonSets,
                                                                    int lookaheadWidth,
                                                                    Dfa <SYMBOL_ENUM, TREE_NODE> dfa,
                                                                    PrecedenceTable <SYMBOL_ENUM> precedenceTable,
                                                                    GrammarReport <SYMBOL_ENUM, TREE_NODE> report)
            this.coverSets       = coverSets;
            this.horizonSets     = horizonSets;
            = report;
            this.precedenceTable = precedenceTable ?? new PrecedenceTable <SYMBOL_ENUM>(productions.SymbolsRep);
            this.symbolsRep      = productions.SymbolsRep;
            actionTable          = new ActionTable <SYMBOL_ENUM, TREE_NODE>(dfa, productions,

            foreach (Node <SYMBOL_ENUM, TREE_NODE> node in dfa.Nodes)
                foreach (SymbolChunk <SYMBOL_ENUM> chunk in node.State.PossibleInputs)
                    ParseAction <SYMBOL_ENUM, TREE_NODE> action_data = computeAction(node, chunk);

                    if (!report.HasGrammarErrors)
                        actionTable.Add(node.State.Index, chunk, new[] { action_data });

                // checking recovery conflicts

                IEnumerable <SingleState <SYMBOL_ENUM, TREE_NODE> > recovery_items = node.State.ParsingActiveItems
                                                                                     .Where(it => it.IsAtRecoveryPoint);

                var recovery_stats = DynamicDictionary.CreateWithDefault <SYMBOL_ENUM, List <SingleState <SYMBOL_ENUM, TREE_NODE> > >();
                foreach (SingleState <SYMBOL_ENUM, TREE_NODE> rec_state in recovery_items)
                    foreach (SymbolChunk <SYMBOL_ENUM> first in firstSets[rec_state.RecoveryMarkerSymbol].Chunks)

                foreach (var pair in recovery_stats.Where(it => it.Value.Count > 1))
                    report.AddError(pair.Value.Select(it => it.IndexStr), "Recovery item conflict on \"" + symbolsRep.Get(pair.Key) + "\".");


            if (report.HasGrammarErrors)
                report.ActionTable = actionTable;
 public string ToString(StringRep <SYMBOL_ENUM> symbolsRep)
     return("Symbol = follow symbol <- production source ~ leaked via production"
            + Environment.NewLine + Environment.NewLine
            + sets.Select(it => symbolsRep.Get(it.Key) + " = " + it.Value.ToString(symbolsRep, verboseMode: false)).Join(Environment.NewLine));