private void HandleRule(GrammarAST start, StateCluster g, GrammarAST blockStart, GrammarAST id) { if (blockStart.SetValue != null) { // if block comes back as a set not BLOCK, make it // a single ALT block g = factory.BuildAlternativeBlockFromSet(g); } if (Rule.GetRuleType(currentRuleName) == RuleType.Parser || grammar.type == GrammarType.Lexer) { // attach start node to block for this rule Rule thisR = grammar.GetLocallyDefinedRule(currentRuleName); NFAState start2 = thisR.StartState; start2.associatedASTNode = id; start2.AddTransition(new Transition(Label.EPSILON, g.Left)); // track decision if > 1 alts if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1) { g.Left.Description = grammar.GrammarTreeToString(start, false); g.Left.SetDecisionASTNode(blockStart); int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blockStart); } // hook to end of rule node NFAState end = thisR.StopState; g.Right.AddTransition(new Transition(Label.EPSILON, end)); } }
/// <summary> /// Builds a NFA from a unicode code point /// </summary> /// <param name="node">An AST node representing a NFA</param> /// <returns>The equivalent NFA</returns> private NFA BuildNFAFromCodepoint(ASTNode node) { // extract the code point value string value = node.Value; value = value.Substring(2, value.Length - 2); int cpValue = Convert.ToInt32(value, 16); if (cpValue < 0 || (cpValue >= 0xD800 && cpValue <= 0xDFFF) || cpValue >= 0x110000) { OnError(node.Position, "The value U+{0} is not a supported unicode code point", cpValue.ToString("X")); return(BuildEpsilonNFA()); } UnicodeCodePoint cp = new UnicodeCodePoint(cpValue); // build the NFA NFA automata = NFA.NewMinimal(); char[] data = cp.GetUTF16(); if (data.Length == 1) { automata.StateEntry.AddTransition(new CharSpan(data[0], data[0]), automata.StateExit); } else { NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(data[0], data[0]), intermediate); intermediate.AddTransition(new CharSpan(data[1], data[1]), automata.StateExit); } return(automata); }
/** Build what amounts to an epsilon transition with an action. * The action goes into NFA though it is ignored during analysis. * It slows things down a bit, but I must ignore predicates after * having seen an action (5-5-2008). */ public virtual StateCluster BuildAction(GrammarAST action) { NFAState left = NewState(); NFAState right = NewState(); Transition e = new Transition(new ActionLabel(action), right); left.AddTransition(e); return(new StateCluster(left, right)); }
public virtual StateCluster BuildRange(int a, int b) { NFAState left = NewState(); NFAState right = NewState(); Label label = new Label(IntervalSet.Of(a, b)); Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
protected virtual void AddFollowTransition( string ruleName, NFAState following ) { //System.Console.Out.WriteLine( "adding follow link to rule " + ruleName ); // find last link in FOLLOW chain emanating from rule Rule r = grammar.GetRule( ruleName ); NFAState end = r.StopState; while ( end.GetTransition( 1 ) != null ) { end = (NFAState)end.GetTransition( 1 ).Target; } if ( end.GetTransition( 0 ) != null ) { // already points to a following node // gotta add another node to keep edges to a max of 2 NFAState n = factory.NewState(); Transition e = new Transition( Label.EPSILON, n ); end.AddTransition( e ); end = n; } Transition followEdge = new Transition( Label.EPSILON, following ); end.AddTransition( followEdge ); }
/** For reference to rule r, build * * o-e->(r) o * * where (r) is the start of rule r and the trailing o is not linked * to from rule ref state directly (it's done thru the transition(0) * RuleClosureTransition. * * If the rule r is just a list of tokens, it's block will be just * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing * the rule reference, but i'm not doing this yet as I'm not sure * it would help much in the NFA->DFA construction. * * TODO add to codegen: collapse alt blks that are sets into single matchSet */ public virtual StateCluster BuildRuleRef(Rule refDef, NFAState ruleStart) { //System.Console.Out.WriteLine( "building ref to rule " + nfa.grammar.name + "." + refDef.name ); NFAState left = NewState(); //left.Description = "ref to " + ruleStart.Description; NFAState right = NewState(); //right.Description = "NFAState following ref to " + ruleStart.Description; Transition e = new RuleClosureTransition(refDef, ruleStart, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/// <summary> /// Builds a NFA that matches everything (a single character) /// </summary> /// <returns>The equivalent NFA</returns> private static NFA BuildNFAFromAny() { NFA automata = NFA.NewMinimal(); // plane 0 transitions automata.StateEntry.AddTransition(new CharSpan((char)0x0000, (char)0xD7FF), automata.StateExit); automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit); // surrogate pairs NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)0xD800, (char)0xDBFF), intermediate); intermediate.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit); return(automata); }
/** Build an atom with all possible values in its label */ public virtual StateCluster BuildWildcard(GrammarAST associatedAST) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(nfa.grammar.TokenTypes); // char or tokens Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** From set build single edge graph o->o-set->o. To conform to * what an alt block looks like, must have extra state on left. */ public virtual StateCluster BuildSet(IIntSet set, GrammarAST associatedAST) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(set); Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** Build what amounts to an epsilon transition with a semantic * predicate action. The pred is a pointer into the AST of * the SEMPRED token. */ public virtual StateCluster BuildSemanticPredicate(GrammarAST pred) { // don't count syn preds if (!pred.Text.StartsWith(Grammar.SynpredRulePrefix, StringComparison.OrdinalIgnoreCase)) { nfa.grammar.numberOfSemanticPredicates++; } NFAState left = NewState(); NFAState right = NewState(); Transition e = new Transition(new PredicateLabel(pred), right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** set up an NFA NFAState that will yield eof tokens or, * in the case of a lexer grammar, an EOT token when the conversion * hits the end of a rule. */ private void BuildEofState(NFAState endNFAState) { NFAState end = NewState(); int label = Label.EOF; if (nfa.grammar.type == GrammarType.Lexer) { label = Label.EOT; end.IsEOTTargetState = true; } //System.Console.Out.WriteLine( "build " + nfa.grammar.getTokenDisplayName( label ) + // " loop on end of state " + endNFAState.Description + // " to state " + end.stateNumber ); Transition toEnd = new Transition(label, end); endNFAState.AddTransition(toEnd); }
private void TransitionBetweenStates(NFAState a, NFAState b, int label) { Transition e = new Transition(label, b); a.AddTransition(e); }
/// <summary> /// Builds a NFA from a character class /// </summary> /// <param name="node">An AST node representing a NFA</param> /// <returns>The equivalent NFA</returns> private NFA BuildNFAFromClass(ASTNode node) { // extract the value string value = node.Value; value = value.Substring(1, value.Length - 2); bool positive = true; if (value.Length > 0 && value[0] == '^') { value = value.Substring(1); positive = false; } // build the character spans List <CharSpan> spans = new List <CharSpan>(); for (int i = 0; i != value.Length;) { // read the first full unicode character CharValue b = GetCharValue(value, i); i += b.length; if (b.chars[0] >= 0xD800 && b.chars[0] <= 0xDFFF) { OnError(node.Position, "Unsupported non-plane 0 Unicode character ({0}) in character class", new String(b.chars)); return(BuildEpsilonNFA()); } if ((i <= value.Length - 2) && (value[i] == '-')) { // this is a range, match the '-' i++; CharValue e = GetCharValue(value, i); i += e.length; if (e.chars[0] >= 0xD800 && e.chars[0] <= 0xDFFF) { OnError(node.Position, "Unsupported non-plane 0 Unicode character ({0}) in character class", new String(e.chars)); return(BuildEpsilonNFA()); } char begin = b.chars.Length == 1 ? b.chars[0] : b.chars[1]; char end = e.chars.Length == 1 ? e.chars[0] : e.chars[1]; if (begin < 0xD800 && end > 0xDFFF) { // oooh you ... spans.Add(new CharSpan(begin, (char)0xD7FF)); spans.Add(new CharSpan((char)0xE000, end)); } else { spans.Add(new CharSpan(begin, end)); } } else { // this is a normal character char begin = b.chars.Length == 1 ? b.chars[0] : b.chars[1]; spans.Add(new CharSpan(begin, begin)); } } // build the result NFA automata = NFA.NewMinimal(); if (positive) { foreach (CharSpan span in spans) { automata.StateEntry.AddTransition(span, automata.StateExit); } } else { spans.Sort(new System.Comparison <CharSpan>(CharSpan.Compare)); // TODO: Check for span intersections and overflow of b (when a span ends on 0xFFFF) char b = (char)0; for (int i = 0; i != spans.Count; i++) { if (spans[i].Begin > b) { automata.StateEntry.AddTransition(new CharSpan(b, (char)(spans[i].Begin - 1)), automata.StateExit); } b = (char)(spans[i].End + 1); // skip the surrogate encoding points if (b >= 0xD800 && b <= 0xDFFF) { b = (char)0xE000; } } if (b <= 0xD7FF) { automata.StateEntry.AddTransition(new CharSpan(b, (char)0xD7FF), automata.StateExit); automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit); } else if (b != 0xFFFF) { // here b >= 0xE000 automata.StateEntry.AddTransition(new CharSpan(b, (char)0xFFFF), automata.StateExit); } // surrogate pairs NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)0xD800, (char)0xDBFF), intermediate); intermediate.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit); } return(automata); }
/// <summary> /// Adds a unicode character span to an existing NFA automaton /// </summary> /// <param name="automata">The target NFA</param> /// <param name="span">The unicode span to add</param> private static void AddUnicodeSpanToNFA(NFA automata, UnicodeSpan span) { char[] b = span.Begin.GetUTF16(); char[] e = span.End.GetUTF16(); if (span.IsPlane0) { // this span is entirely in plane 0 automata.StateEntry.AddTransition(new CharSpan(b[0], e[0]), automata.StateExit); } else if (span.Begin.IsPlane0) { // this span has only a part in plane 0 if (b[0] < 0xD800) { automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xD7FF), automata.StateExit); automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit); } else { automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xFFFF), automata.StateExit); } NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)0xD800, e[0]), intermediate); intermediate.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } else { // this span has no part in plane 0 if (b[0] == e[0]) { // same first surrogate NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), intermediate); intermediate.AddTransition(new CharSpan(b[1], e[1]), automata.StateExit); } else if (e[0] == b[0] + 1) { // the first surrogates are consecutive encodings // build lower half NFAState i1 = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), i1); i1.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit); // build upper half NFAState i2 = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), i2); i2.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } else { // there is at least one surrogate value between the first surrogates of begin and end // build lower part NFAState ia = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), ia); ia.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit); // build intermediate part NFAState im = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)(b[0] + 1), (char)(e[0] - 1)), im); im.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit); // build upper part NFAState iz = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), iz); iz.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } } }