private void HandleRule(GrammarAST start, StateCluster g, GrammarAST blockStart, GrammarAST id)
        {
            if (blockStart.SetValue != null)
            {
                // if block comes back as a set not BLOCK, make it
                // a single ALT block
                g = factory.BuildAlternativeBlockFromSet(g);
            }

            if (Rule.GetRuleType(currentRuleName) == RuleType.Parser || grammar.type == GrammarType.Lexer)
            {
                // attach start node to block for this rule
                Rule thisR = grammar.GetLocallyDefinedRule(currentRuleName);
                NFAState start2 = thisR.StartState;
                start2.associatedASTNode = id;
                start2.AddTransition(new Transition(Label.EPSILON, g.Left));

                // track decision if > 1 alts
                if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1)
                {
                    g.Left.Description = grammar.GrammarTreeToString(start, false);
                    g.Left.SetDecisionASTNode(blockStart);
                    int d = grammar.AssignDecisionNumber(g.Left);
                    grammar.SetDecisionNFA(d, g.Left);
                    grammar.SetDecisionBlockAST(d, blockStart);
                }

                // hook to end of rule node
                NFAState end = thisR.StopState;
                g.Right.AddTransition(new Transition(Label.EPSILON, end));
            }
        }
Example #2
0
        /// <summary>
        /// Builds a NFA from a unicode code point
        /// </summary>
        /// <param name="node">An AST node representing a NFA</param>
        /// <returns>The equivalent NFA</returns>
        private NFA BuildNFAFromCodepoint(ASTNode node)
        {
            // extract the code point value
            string value = node.Value;

            value = value.Substring(2, value.Length - 2);
            int cpValue = Convert.ToInt32(value, 16);

            if (cpValue < 0 || (cpValue >= 0xD800 && cpValue <= 0xDFFF) || cpValue >= 0x110000)
            {
                OnError(node.Position, "The value U+{0} is not a supported unicode code point", cpValue.ToString("X"));
                return(BuildEpsilonNFA());
            }
            UnicodeCodePoint cp = new UnicodeCodePoint(cpValue);
            // build the NFA
            NFA automata = NFA.NewMinimal();

            char[] data = cp.GetUTF16();
            if (data.Length == 1)
            {
                automata.StateEntry.AddTransition(new CharSpan(data[0], data[0]), automata.StateExit);
            }
            else
            {
                NFAState intermediate = automata.AddNewState();
                automata.StateEntry.AddTransition(new CharSpan(data[0], data[0]), intermediate);
                intermediate.AddTransition(new CharSpan(data[1], data[1]), automata.StateExit);
            }
            return(automata);
        }
Example #3
0
        /** Build what amounts to an epsilon transition with an action.
         *  The action goes into NFA though it is ignored during analysis.
         *  It slows things down a bit, but I must ignore predicates after
         *  having seen an action (5-5-2008).
         */
        public virtual StateCluster BuildAction(GrammarAST action)
        {
            NFAState   left  = NewState();
            NFAState   right = NewState();
            Transition e     = new Transition(new ActionLabel(action), right);

            left.AddTransition(e);
            return(new StateCluster(left, right));
        }
Example #4
0
        public virtual StateCluster BuildRange(int a, int b)
        {
            NFAState   left  = NewState();
            NFAState   right = NewState();
            Label      label = new Label(IntervalSet.Of(a, b));
            Transition e     = new Transition(label, right);

            left.AddTransition(e);
            StateCluster g = new StateCluster(left, right);

            return(g);
        }
 protected virtual void AddFollowTransition( string ruleName, NFAState following )
 {
     //System.Console.Out.WriteLine( "adding follow link to rule " + ruleName );
     // find last link in FOLLOW chain emanating from rule
     Rule r = grammar.GetRule( ruleName );
     NFAState end = r.StopState;
     while ( end.GetTransition( 1 ) != null )
     {
         end = (NFAState)end.GetTransition( 1 ).Target;
     }
     if ( end.GetTransition( 0 ) != null )
     {
         // already points to a following node
         // gotta add another node to keep edges to a max of 2
         NFAState n = factory.NewState();
         Transition e = new Transition( Label.EPSILON, n );
         end.AddTransition( e );
         end = n;
     }
     Transition followEdge = new Transition( Label.EPSILON, following );
     end.AddTransition( followEdge );
 }
Example #6
0
        /** For reference to rule r, build
         *
         *  o-e->(r)  o
         *
         *  where (r) is the start of rule r and the trailing o is not linked
         *  to from rule ref state directly (it's done thru the transition(0)
         *  RuleClosureTransition.
         *
         *  If the rule r is just a list of tokens, it's block will be just
         *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
         *  the rule reference, but i'm not doing this yet as I'm not sure
         *  it would help much in the NFA->DFA construction.
         *
         *  TODO add to codegen: collapse alt blks that are sets into single matchSet
         */
        public virtual StateCluster BuildRuleRef(Rule refDef, NFAState ruleStart)
        {
            //System.Console.Out.WriteLine( "building ref to rule " + nfa.grammar.name + "." + refDef.name );
            NFAState left = NewState();
            //left.Description = "ref to " + ruleStart.Description;
            NFAState right = NewState();
            //right.Description = "NFAState following ref to " + ruleStart.Description;
            Transition e = new RuleClosureTransition(refDef, ruleStart, right);

            left.AddTransition(e);
            StateCluster g = new StateCluster(left, right);

            return(g);
        }
Example #7
0
        /// <summary>
        /// Builds a NFA that matches everything (a single character)
        /// </summary>
        /// <returns>The equivalent NFA</returns>
        private static NFA BuildNFAFromAny()
        {
            NFA automata = NFA.NewMinimal();

            // plane 0 transitions
            automata.StateEntry.AddTransition(new CharSpan((char)0x0000, (char)0xD7FF), automata.StateExit);
            automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit);
            // surrogate pairs
            NFAState intermediate = automata.AddNewState();

            automata.StateEntry.AddTransition(new CharSpan((char)0xD800, (char)0xDBFF), intermediate);
            intermediate.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit);
            return(automata);
        }
Example #8
0
        /** Build an atom with all possible values in its label */
        public virtual StateCluster BuildWildcard(GrammarAST associatedAST)
        {
            NFAState left  = NewState();
            NFAState right = NewState();

            left.associatedASTNode  = associatedAST;
            right.associatedASTNode = associatedAST;
            Label      label = new Label(nfa.grammar.TokenTypes); // char or tokens
            Transition e     = new Transition(label, right);

            left.AddTransition(e);
            StateCluster g = new StateCluster(left, right);

            return(g);
        }
Example #9
0
        /** From set build single edge graph o->o-set->o.  To conform to
         *  what an alt block looks like, must have extra state on left.
         */
        public virtual StateCluster BuildSet(IIntSet set, GrammarAST associatedAST)
        {
            NFAState left  = NewState();
            NFAState right = NewState();

            left.associatedASTNode  = associatedAST;
            right.associatedASTNode = associatedAST;
            Label      label = new Label(set);
            Transition e     = new Transition(label, right);

            left.AddTransition(e);
            StateCluster g = new StateCluster(left, right);

            return(g);
        }
Example #10
0
        /** Build what amounts to an epsilon transition with a semantic
         *  predicate action.  The pred is a pointer into the AST of
         *  the SEMPRED token.
         */
        public virtual StateCluster BuildSemanticPredicate(GrammarAST pred)
        {
            // don't count syn preds
            if (!pred.Text.StartsWith(Grammar.SynpredRulePrefix, StringComparison.OrdinalIgnoreCase))
            {
                nfa.grammar.numberOfSemanticPredicates++;
            }
            NFAState   left  = NewState();
            NFAState   right = NewState();
            Transition e     = new Transition(new PredicateLabel(pred), right);

            left.AddTransition(e);
            StateCluster g = new StateCluster(left, right);

            return(g);
        }
Example #11
0
        /** set up an NFA NFAState that will yield eof tokens or,
         *  in the case of a lexer grammar, an EOT token when the conversion
         *  hits the end of a rule.
         */
        private void BuildEofState(NFAState endNFAState)
        {
            NFAState end   = NewState();
            int      label = Label.EOF;

            if (nfa.grammar.type == GrammarType.Lexer)
            {
                label = Label.EOT;
                end.IsEOTTargetState = true;
            }
            //System.Console.Out.WriteLine( "build " + nfa.grammar.getTokenDisplayName( label ) +
            //                              " loop on end of state " + endNFAState.Description +
            //                              " to state " + end.stateNumber );
            Transition toEnd = new Transition(label, end);

            endNFAState.AddTransition(toEnd);
        }
Example #12
0
        private void TransitionBetweenStates(NFAState a, NFAState b, int label)
        {
            Transition e = new Transition(label, b);

            a.AddTransition(e);
        }
Example #13
0
        /// <summary>
        /// Builds a NFA from a character class
        /// </summary>
        /// <param name="node">An AST node representing a NFA</param>
        /// <returns>The equivalent NFA</returns>
        private NFA BuildNFAFromClass(ASTNode node)
        {
            // extract the value
            string value = node.Value;

            value = value.Substring(1, value.Length - 2);
            bool positive = true;

            if (value.Length > 0 && value[0] == '^')
            {
                value    = value.Substring(1);
                positive = false;
            }
            // build the character spans
            List <CharSpan> spans = new List <CharSpan>();

            for (int i = 0; i != value.Length;)
            {
                // read the first full unicode character
                CharValue b = GetCharValue(value, i);
                i += b.length;
                if (b.chars[0] >= 0xD800 && b.chars[0] <= 0xDFFF)
                {
                    OnError(node.Position, "Unsupported non-plane 0 Unicode character ({0}) in character class", new String(b.chars));
                    return(BuildEpsilonNFA());
                }
                if ((i <= value.Length - 2) && (value[i] == '-'))
                {
                    // this is a range, match the '-'
                    i++;
                    CharValue e = GetCharValue(value, i);
                    i += e.length;
                    if (e.chars[0] >= 0xD800 && e.chars[0] <= 0xDFFF)
                    {
                        OnError(node.Position, "Unsupported non-plane 0 Unicode character ({0}) in character class", new String(e.chars));
                        return(BuildEpsilonNFA());
                    }
                    char begin = b.chars.Length == 1 ? b.chars[0] : b.chars[1];
                    char end   = e.chars.Length == 1 ? e.chars[0] : e.chars[1];
                    if (begin < 0xD800 && end > 0xDFFF)
                    {
                        // oooh you ...
                        spans.Add(new CharSpan(begin, (char)0xD7FF));
                        spans.Add(new CharSpan((char)0xE000, end));
                    }
                    else
                    {
                        spans.Add(new CharSpan(begin, end));
                    }
                }
                else
                {
                    // this is a normal character
                    char begin = b.chars.Length == 1 ? b.chars[0] : b.chars[1];
                    spans.Add(new CharSpan(begin, begin));
                }
            }
            // build the result
            NFA automata = NFA.NewMinimal();

            if (positive)
            {
                foreach (CharSpan span in spans)
                {
                    automata.StateEntry.AddTransition(span, automata.StateExit);
                }
            }
            else
            {
                spans.Sort(new System.Comparison <CharSpan>(CharSpan.Compare));
                // TODO: Check for span intersections and overflow of b (when a span ends on 0xFFFF)
                char b = (char)0;
                for (int i = 0; i != spans.Count; i++)
                {
                    if (spans[i].Begin > b)
                    {
                        automata.StateEntry.AddTransition(new CharSpan(b, (char)(spans[i].Begin - 1)), automata.StateExit);
                    }
                    b = (char)(spans[i].End + 1);
                    // skip the surrogate encoding points
                    if (b >= 0xD800 && b <= 0xDFFF)
                    {
                        b = (char)0xE000;
                    }
                }
                if (b <= 0xD7FF)
                {
                    automata.StateEntry.AddTransition(new CharSpan(b, (char)0xD7FF), automata.StateExit);
                    automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit);
                }
                else if (b != 0xFFFF)
                {
                    // here b >= 0xE000
                    automata.StateEntry.AddTransition(new CharSpan(b, (char)0xFFFF), automata.StateExit);
                }
                // surrogate pairs
                NFAState intermediate = automata.AddNewState();
                automata.StateEntry.AddTransition(new CharSpan((char)0xD800, (char)0xDBFF), intermediate);
                intermediate.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit);
            }
            return(automata);
        }
Example #14
0
        /// <summary>
        /// Adds a unicode character span to an existing NFA automaton
        /// </summary>
        /// <param name="automata">The target NFA</param>
        /// <param name="span">The unicode span to add</param>
        private static void AddUnicodeSpanToNFA(NFA automata, UnicodeSpan span)
        {
            char[] b = span.Begin.GetUTF16();
            char[] e = span.End.GetUTF16();

            if (span.IsPlane0)
            {
                // this span is entirely in plane 0
                automata.StateEntry.AddTransition(new CharSpan(b[0], e[0]), automata.StateExit);
            }
            else if (span.Begin.IsPlane0)
            {
                // this span has only a part in plane 0
                if (b[0] < 0xD800)
                {
                    automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xD7FF), automata.StateExit);
                    automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit);
                }
                else
                {
                    automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xFFFF), automata.StateExit);
                }
                NFAState intermediate = automata.AddNewState();
                automata.StateEntry.AddTransition(new CharSpan((char)0xD800, e[0]), intermediate);
                intermediate.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
            }
            else
            {
                // this span has no part in plane 0
                if (b[0] == e[0])
                {
                    // same first surrogate
                    NFAState intermediate = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), intermediate);
                    intermediate.AddTransition(new CharSpan(b[1], e[1]), automata.StateExit);
                }
                else if (e[0] == b[0] + 1)
                {
                    // the first surrogates are consecutive encodings
                    // build lower half
                    NFAState i1 = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), i1);
                    i1.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit);
                    // build upper half
                    NFAState i2 = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), i2);
                    i2.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
                }
                else
                {
                    // there is at least one surrogate value between the first surrogates of begin and end
                    // build lower part
                    NFAState ia = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), ia);
                    ia.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit);
                    // build intermediate part
                    NFAState im = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan((char)(b[0] + 1), (char)(e[0] - 1)), im);
                    im.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit);
                    // build upper part
                    NFAState iz = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), iz);
                    iz.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
                }
            }
        }