Пример #1
0
        static private int mapped_charset_size;   // reduced charset size

        static internal void simplify(Spec spec)
        {
            computeClasses(spec); // initialize fields.

            /*
             * now rewrite the NFA using our character class mapping.
             */
            for (int i = 0; i < spec.nfa_states.Count; i++)
            {
                Nfa nfa = (Nfa)spec.nfa_states[i];
                if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON)
                {
                    continue;   // no change.
                }
                if (nfa.GetEdge() == Nfa.CCL)
                {
                    CharSet nset = new CharSet();
                    nset.map(nfa.GetCharSet(), ccls); // map it.
                    nfa.SetCharSet(nset);
                }
                else
                {                                     // single character
                    nfa.SetEdge(ccls[nfa.GetEdge()]); // map it.
                }
            }

            /*
             * now update spec with the mapping.
             */
            spec.ccls_map     = ccls;
            spec.dtrans_ncols = mapped_charset_size;
        }
Пример #2
0
        public void move(Dfa dfa, int b)
        {
            List <Nfa> nFASet = dfa.GetNFASet();

            this.nfa_set = null;
            this.nfa_bit = null;
            int count = nFASet.Count;

            for (int i = 0; i < count; i++)
            {
                Nfa nfa = nFASet[i];
                if (b == (int)nfa.Edge || ('￾' == nfa.Edge && nfa.GetCharSet().contains(b)))
                {
                    if (this.nfa_set == null)
                    {
                        this.nfa_set = new List <Nfa>();
                        this.nfa_bit = new BitSet();
                    }
                    this.nfa_set.Add(nfa.Next);
                    this.nfa_bit.Set(nfa.Next.Label, true);
                }
            }
            if (this.nfa_set != null)
            {
                this.sort_states();
            }
        }
Пример #3
0
        public static NfaPair NewNLPair(Spec spec)
        {
            NfaPair nfaPair = Alloc.NewNfaPair();

            nfaPair.end   = Alloc.NewNfa(spec);
            nfaPair.start = Alloc.NewNfa(spec);
            Nfa start = nfaPair.start;

            start.Next = Alloc.NewNfa(spec);
            Nfa next = start.Next;

            next.Edge = '￾';
            next.SetCharSet(new CharSet());
            next.GetCharSet().add(10);
            next.Next     = nfaPair.end;
            start.Sibling = Alloc.NewNfa(spec);
            Nfa sibling = start.Sibling;

            sibling.Edge = '\r';
            sibling.Next = Alloc.NewNfa(spec);
            Nfa next2 = sibling.Next;

            next2.Next         = null;
            next2.Sibling      = Alloc.NewNfa(spec);
            next2.Sibling.Edge = '\n';
            next2.Sibling.Next = nfaPair.end;
            return(nfaPair);
        }
Пример #4
0
        private static Nfa machine()
        {
            Nfa    nfa    = Alloc.NewNfa(MakeNfa.spec);
            Nfa    nfa2   = nfa;
            BitSet states = MakeNfa.gen.GetStates();

            MakeNfa.spec.current_token = Tokens.EOS;
            MakeNfa.gen.Advance();
            if (Tokens.END_OF_INPUT != MakeNfa.spec.current_token)
            {
                nfa2.Next = MakeNfa.rule();
                MakeNfa.ProcessStates(states, nfa2.Next);
            }
            while (Tokens.END_OF_INPUT != MakeNfa.spec.current_token)
            {
                states = MakeNfa.gen.GetStates();
                MakeNfa.gen.Advance();
                if (Tokens.END_OF_INPUT == MakeNfa.spec.current_token)
                {
                    break;
                }
                nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec);
                nfa2         = nfa2.Sibling;
                nfa2.Next    = MakeNfa.rule();
                MakeNfa.ProcessStates(states, nfa2.Next);
            }
            nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec);
            nfa2         = nfa2.Sibling;
            nfa2.Next    = Alloc.NewNfa(MakeNfa.spec);
            Nfa next = nfa2.Next;

            next.Edge = '￾';
            next.Next = Alloc.NewNfa(MakeNfa.spec);
            next.SetCharSet(new CharSet());
            next.GetCharSet().add((int)MakeNfa.spec.BOL);
            next.GetCharSet().add((int)MakeNfa.spec.EOF);
            next.Next.SetAccept(new Accept(null));
            for (int i = 0; i < MakeNfa.spec.States.Count; i++)
            {
                List <Nfa> list = MakeNfa.spec.state_rules[i];
                list.Add(next);
            }
            return(nfa);
        }
Пример #5
0
        private static void computeClasses(Spec spec)
        {
            SimplifyNfa.original_charset_size = spec.dtrans_ncols;
            SimplifyNfa.ccls = new char[SimplifyNfa.original_charset_size];
            char   c       = '\u0001';
            BitSet bitSet  = new BitSet();
            BitSet bitSet2 = new BitSet();
            Dictionary <char, char> dictionary = new Dictionary <char, char>();

            Console.WriteLine("Working on character classes.");
            for (int i = 0; i < spec.nfa_states.Count; i++)
            {
                Nfa nfa = spec.nfa_states[i];
                if (nfa.Edge != '�' && nfa.Edge != '')
                {
                    bitSet.ClearAll();
                    bitSet2.ClearAll();
                    for (int j = 0; j < SimplifyNfa.ccls.Length; j++)
                    {
                        if ((int)nfa.Edge == j || (nfa.Edge == '￾' && nfa.GetCharSet().contains(j)))
                        {
                            bitSet.Set((int)SimplifyNfa.ccls[j], true);
                        }
                        else
                        {
                            bitSet2.Set((int)SimplifyNfa.ccls[j], true);
                        }
                    }
                    bitSet.And(bitSet2);
                    if (bitSet.GetLength() != 0)
                    {
                        dictionary.Clear();
                        for (int k = 0; k < SimplifyNfa.ccls.Length; k++)
                        {
                            if (bitSet.Get((int)SimplifyNfa.ccls[k]) && ((int)nfa.Edge == k || (nfa.Edge == '￾' && nfa.GetCharSet().contains(k))))
                            {
                                char c2 = SimplifyNfa.ccls[k];
                                if (!dictionary.ContainsKey(c2))
                                {
                                    Dictionary <char, char> arg_14F_0 = dictionary;
                                    char arg_14F_1 = c2;
                                    char expr_14A  = c;
                                    c = (char)(expr_14A + '\u0001');
                                    arg_14F_0.Add(arg_14F_1, expr_14A);
                                }
                                SimplifyNfa.ccls[k] = dictionary[c2];
                            }
                        }
                    }
                }
            }
            SimplifyNfa.mapped_charset_size = (int)c;
        }
Пример #6
0
 internal static void simplify(Spec spec)
 {
     SimplifyNfa.computeClasses(spec);
     for (int i = 0; i < spec.nfa_states.Count; i++)
     {
         Nfa nfa = spec.nfa_states[i];
         if (nfa.Edge != '�' && nfa.Edge != '')
         {
             if (nfa.Edge == '￾')
             {
                 CharSet charSet = new CharSet();
                 charSet.map(nfa.GetCharSet(), SimplifyNfa.ccls);
                 nfa.SetCharSet(charSet);
             }
             else
             {
                 nfa.Edge = SimplifyNfa.ccls[(int)nfa.Edge];
             }
         }
     }
     spec.ccls_map     = SimplifyNfa.ccls;
     spec.dtrans_ncols = SimplifyNfa.mapped_charset_size;
 }
Пример #7
0
/*
 * Compute minimum set of character classes needed to disambiguate
 * edges.  We optimistically assume that every character belongs to
 * a single character class, and then incrementally split classes
 * as we see edges that require discrimination between characters in
 * the class.
 */
        static private void computeClasses(Spec spec)
        {
            original_charset_size = spec.dtrans_ncols;
            ccls = new int[original_charset_size]; // initially all zero.

            int       nextcls = 1;
            BitSet    clsA    = new BitSet();
            BitSet    clsB    = new BitSet();
            Hashtable h       = new Hashtable();

            Console.WriteLine("Working on character classes.");
            for (int index = 0; index < spec.nfa_states.Count; index++)
            {
                Nfa nfa = (Nfa)spec.nfa_states[index];
                if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON)
                {
                    continue;   // no discriminatory information.
                }
                clsA.ClearAll();
                clsB.ClearAll();
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (nfa.GetEdge() == i ||         // edge labeled with a character
                        nfa.GetEdge() == Nfa.CCL &&
                        nfa.GetCharSet().contains(i)) // set of characters
                    {
                        clsA.Set(ccls[i], true);
                    }
                    else
                    {
                        clsB.Set(ccls[i], true);
                    }
                }

                /*
                 * now figure out which character classes we need to split.
                 */
                clsA.And(clsB); // split the classes which show up on both sides of edge
                if (clsA.GetLength() == 0)
                {
                    Console.Write(".");
                    continue;
                }
                Console.Write(":");

                /*
                 * and split them.
                 */
                h.Clear(); // h will map old to new class name
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (clsA.Get(ccls[i])) // a split class
                    {
                        if (nfa.GetEdge() == i ||
                            nfa.GetEdge() == Nfa.CCL &&
                            nfa.GetCharSet().contains(i))
                        { // on A side
                            int split = ccls[i];
                            if (!h.ContainsKey(split))
                            {
                                h.Add(split, nextcls++); // make new class
#if DEBUG
                                Console.WriteLine("Adding char " + (nextcls - 1) + " split=" + split + " i=" + i);
#endif
                            }
                            ccls[i] = (int)h[split];
                        }
                    }
                }
            }
            Console.WriteLine();
            Console.WriteLine("NFA has " + nextcls + " distinct character classes.");
            mapped_charset_size = nextcls;
        }
Пример #8
0
/*
 * Function: machine
 * Description: Recursive descent regular expression parser.
 */
        private static Nfa machine()
        {
            Nfa    start;
            Nfa    p;
            BitSet states;

#if DESCENT_DEBUG
            Utility.enter("machine", spec.lexeme, spec.current_token);
#endif

            start = Alloc.NewNfa(spec);
            p     = start;

            states = gen.GetStates();

            /* Begin: Added for states. */
            spec.current_token = Gen.EOS;
            gen.Advance();
            /* End: Added for states. */

            if (Gen.END_OF_INPUT != spec.current_token)
            {
                p.SetNext(rule());
                ProcessStates(states, p.GetNext());
            }

            while (Gen.END_OF_INPUT != spec.current_token)
            {
                /* Make state changes HERE. */
                states = gen.GetStates();

                /* Begin: Added for states. */
                gen.Advance();
                if (Gen.END_OF_INPUT == spec.current_token)
                {
                    break;
                }
                /* End: Added for states. */

                p.SetSib(Alloc.NewNfa(spec));
                p = p.GetSib();
                p.SetNext(rule());

                ProcessStates(states, p.GetNext());
            }

            /*
             * add pseudo-rules for BOL and EOF
             */
            p.SetSib(Alloc.NewNfa(spec));
            p = p.GetSib();
            p.SetNext(Alloc.NewNfa(spec));
            Nfa pnext = p.GetNext();
            pnext.SetEdge(Nfa.CCL);
            pnext.SetNext(Alloc.NewNfa(spec));
            pnext.SetCharSet(new CharSet());
            pnext.GetCharSet().add(spec.BOL);
            pnext.GetCharSet().add(spec.EOF);

            // do-nothing accept rule
            pnext.GetNext().SetAccept(new Accept(null, input.line_number + 1));

            /* add the pseudo rules */
            for (int i = 0; i < spec.states.Count; i++)
            {
                ArrayList srule = spec.state_rules[i];
                srule.Add(pnext);
            }

#if DESCENT_DEBUG
            Utility.leave("machine", spec.lexeme, spec.current_token);
#endif

            return(start);
        }
Пример #9
0
 private static void term(NfaPair pair)
 {
     if (Tokens.OPEN_PAREN == MakeNfa.spec.current_token)
     {
         MakeNfa.gen.Advance();
         MakeNfa.expr(pair);
         if (Tokens.CLOSE_PAREN == MakeNfa.spec.current_token)
         {
             MakeNfa.gen.Advance();
             return;
         }
         Error.ParseError(Errors.SYNTAX, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number);
         return;
     }
     else
     {
         Nfa nfa = Alloc.NewNfa(MakeNfa.spec);
         pair.start = nfa;
         nfa.Next   = Alloc.NewNfa(MakeNfa.spec);
         pair.end   = nfa.Next;
         bool flag = MakeNfa.spec.current_token == Tokens.LETTER && char.IsLetter(MakeNfa.spec.current_token_value);
         if (MakeNfa.spec.current_token != Tokens.ANY && MakeNfa.spec.current_token != Tokens.CCL_START && (!MakeNfa.spec.IgnoreCase || !flag))
         {
             nfa.Edge = MakeNfa.spec.current_token_value;
             MakeNfa.gen.Advance();
             return;
         }
         nfa.Edge = '￾';
         nfa.SetCharSet(new CharSet());
         CharSet charSet = nfa.GetCharSet();
         if (MakeNfa.spec.IgnoreCase && flag)
         {
             charSet.addncase(MakeNfa.spec.current_token_value);
         }
         else
         {
             if (MakeNfa.spec.current_token == Tokens.ANY)
             {
                 charSet.add(10);
                 charSet.add(13);
                 charSet.add((int)MakeNfa.spec.BOL);
                 charSet.add((int)MakeNfa.spec.EOF);
                 charSet.complement();
             }
             else
             {
                 MakeNfa.gen.Advance();
                 if (MakeNfa.spec.current_token == Tokens.CHAR_CLASS)
                 {
                     MakeNfa.gen.Advance();
                     if (!charSet.AddClass(MakeNfa.spec.class_name.ToLower()))
                     {
                         Error.ParseError(Errors.InvalidCharClass, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number);
                     }
                 }
                 else
                 {
                     if (MakeNfa.spec.current_token == Tokens.AT_BOL)
                     {
                         MakeNfa.gen.Advance();
                         charSet.add((int)MakeNfa.spec.BOL);
                         charSet.add((int)MakeNfa.spec.EOF);
                         charSet.complement();
                     }
                 }
                 if (MakeNfa.spec.current_token != Tokens.CCL_END)
                 {
                     MakeNfa.dodash(charSet);
                 }
             }
         }
         MakeNfa.gen.Advance();
         return;
     }
 }