}//method #region some explanations //Computes non-canonical lookaheads and jump lookaheads - those that cause jump // to non-canonical state // We are doing it top-down way, starting from most reduced lookaheads - they are not conflicting. // (If there were conflicting reduced lookaheads in a state initially, the grammar transformation algorithm // should have already wrapped them into non-conflicting "tail" non-terminals.) // We want to eliminate reduced lookaheads as much as possible, and replace them with expanded "child" // terms, to have only those non-canonical lookaheads that are absolutely necessary. // So for each reduced lookahead we check if we can replace it with its expanded, "child" terms // (from DirectFirsts set). We do it only if lookaheads child terms are all non-conflicting as lookaheads in // the state. If however, at least one child is conflicting, the reduced parent should stay. // What if we have some children conflicting and some not? We leave the parent reduced lookahead in state, // to cover (hide) the conflicting children, but we also add non-conflicting children as well, to allow // the parser automaton to use them (in canonical state) as soon as they are recognized, without need // to reduce the parent and switch back to canonical state. #endregion private void ComputeStateNonCanonicalLookaheads(ParserState state) { var stateData = state.BuilderData; //rename for shorter code var jumps = stateData.JumpLookaheads; // conflicting lookaheads, that must result in jump to non-canonical state var valids = stateData.NonCanonicalLookaheads; // valid non-canonical lookaheads, non-terminals only jumps.Clear(); valids.Clear(); var alreadyChecked = new BnfTermSet(); var toCheck = new BnfTermSet(); //terms to check for expansion //1. precompute initial set to check foreach (var reduceItem in stateData.ReduceItems) toCheck.UnionWith(reduceItem.ReducedLookaheads); toCheck.RemoveWhere(t => t is Terminal); //we are interested in non-terminals only //2. Try to expand all initial (reduced) lookaheads, and replace original lookaheads with expanded versions while (toCheck.Count > 0) { // do until no terms to check left var lkhInCheck = toCheck.First() as NonTerminal; toCheck.Remove(lkhInCheck); //to prevent repeated checking of mutually recursive terms if (alreadyChecked.Contains(lkhInCheck)) continue; alreadyChecked.Add(lkhInCheck); //Now check children for conflicts; go through all direct firsts of lkhInCheck and check them for conflicts bool hasJumpChild = false; foreach (var lkhChild in lkhInCheck.DirectFirsts) { if (lkhChild == lkhInCheck) continue; if (jumps.Contains(lkhChild)) { hasJumpChild = true; continue; } var ntChild = lkhChild as NonTerminal; if (ntChild != null && valids.Contains(ntChild)) continue; //the child has not been tested yet; check if it is a conflict in current state var occurCount = GetLookaheadOccurenceCount(state, lkhChild); if (occurCount > 1) { //possible conflict, check precedence if (lkhChild.IsSet(TermOptions.UsePrecedence)) { if (ntChild != null) { valids.Add(ntChild); //if it is terminal, it is valid; if (!alreadyChecked.Contains(lkhChild)) toCheck.Add(ntChild); } //if ntChild } else { //conflict! hasJumpChild = true; jumps.Add(lkhChild); //if it is non-terminal, add its Firsts to conflict as well if (ntChild != null) { jumps.UnionWith(ntChild.Firsts); //valids.ExceptWith(ntChild.Firsts); } }//if IsSet... else... } else { //occurCount == 1 //no conflict: if it is non-terminal, add it to toCheck set to check in the future if (ntChild != null && !alreadyChecked.Contains(ntChild)) toCheck.Add(ntChild); //if nonterminal and not checked yet, add it to toCheck for further checking }//if ...else... }//foreach lkhChild //Ok, we finished checking all direct children; if at least one of them has conflict, // then lkhInCheck (parent) must stay as a lookahead - we cannot fully expand it replacing by all children if (hasJumpChild) valids.Add(lkhInCheck); }//while toCheck.Count > 0 //remove conflicts stateData.Conflicts.Clear(); }//method
}//method #region some explanations //Computes non-canonical lookaheads and jump lookaheads - those that cause jump // to non-canonical state // We are doing it top-down way, starting from most reduced lookaheads - they are not conflicting. // (If there were conflicting reduced lookaheads in a state initially, the grammar transformation algorithm // should have already wrapped them into non-conflicting "tail" non-terminals.) // We want to eliminate reduced lookaheads as much as possible, and replace them with expanded "child" // terms, to have only those non-canonical lookaheads that are absolutely necessary. // So for each reduced lookahead we check if we can replace it with its expanded, "child" terms // (from DirectFirsts set). We do it only if lookaheads child terms are all non-conflicting as lookaheads in // the state. If however, at least one child is conflicting, the reduced parent should stay. // What if we have some children conflicting and some not? We leave the parent reduced lookahead in state, // to cover (hide) the conflicting children, but we also add non-conflicting children as well, to allow // the parser automaton to use them (in canonical state) as soon as they are recognized, without need // to reduce the parent and switch back to canonical state. #endregion private void ComputeStateNonCanonicalLookaheads(ParserState state) { var stateData = state.BuilderData; //rename for shorter code var jumps = stateData.JumpLookaheads; // conflicting lookaheads, that must result in jump to non-canonical state var valids = stateData.NonCanonicalLookaheads; // valid non-canonical lookaheads, non-terminals only jumps.Clear(); valids.Clear(); var alreadyChecked = new BnfTermSet(); var toCheck = new BnfTermSet(); //terms to check for expansion //1. precompute initial set to check foreach (var reduceItem in stateData.ReduceItems) { toCheck.UnionWith(reduceItem.ReducedLookaheads); } toCheck.RemoveWhere(t => t is Terminal); //we are interested in non-terminals only //2. Try to expand all initial (reduced) lookaheads, and replace original lookaheads with expanded versions while (toCheck.Count > 0) // do until no terms to check left { var lkhInCheck = toCheck.First() as NonTerminal; toCheck.Remove(lkhInCheck); //to prevent repeated checking of mutually recursive terms if (alreadyChecked.Contains(lkhInCheck)) { continue; } alreadyChecked.Add(lkhInCheck); //Now check children for conflicts; go through all direct firsts of lkhInCheck and check them for conflicts bool hasJumpChild = false; foreach (var lkhChild in lkhInCheck.DirectFirsts) { if (lkhChild == lkhInCheck) { continue; } if (jumps.Contains(lkhChild)) { hasJumpChild = true; continue; } var ntChild = lkhChild as NonTerminal; if (ntChild != null && valids.Contains(ntChild)) { continue; } //the child has not been tested yet; check if it is a conflict in current state var occurCount = GetLookaheadOccurenceCount(state, lkhChild); if (occurCount > 1) { //possible conflict, check precedence if (lkhChild.IsSet(TermOptions.UsePrecedence)) { if (ntChild != null) { valids.Add(ntChild); //if it is terminal, it is valid; if (!alreadyChecked.Contains(lkhChild)) { toCheck.Add(ntChild); } } //if ntChild } else { //conflict! hasJumpChild = true; jumps.Add(lkhChild); //if it is non-terminal, add its Firsts to conflict as well if (ntChild != null) { jumps.UnionWith(ntChild.Firsts); //valids.ExceptWith(ntChild.Firsts); } } //if IsSet... else... } else //occurCount == 1 //no conflict: if it is non-terminal, add it to toCheck set to check in the future { if (ntChild != null && !alreadyChecked.Contains(ntChild)) { toCheck.Add(ntChild); //if nonterminal and not checked yet, add it to toCheck for further checking } }//if ...else... }//foreach lkhChild //Ok, we finished checking all direct children; if at least one of them has conflict, // then lkhInCheck (parent) must stay as a lookahead - we cannot fully expand it replacing by all children if (hasJumpChild) { valids.Add(lkhInCheck); } }//while toCheck.Count > 0 //remove conflicts stateData.Conflicts.Clear(); }//method