Exemplo n.º 1
0
        /***************************************************************
         * Function: emit
         * Description: High-level access function to module.
         **************************************************************/
        public void emit
        (
            CSpec spec,
            System.IO.TextWriter outstream
        )
        {
            Set(spec, outstream);

            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != m_spec);
                CUtility.ASSERT(null != m_outstream);
            }

            if (CUtility.OLD_DEBUG)
            {
                print_details();
            }

            emit_header();
            emit_construct();
            emit_helpers();
            emit_driver();
            emit_footer();

            reset();
        }
        private int original_charset_size; // original charset size

        #endregion Fields

        #region Methods

        public void simplify(CSpec m_spec)
        {
            computeClasses(m_spec); // initialize fields.

            // now rewrite the NFA using our character class mapping.
              IEnumerator e=m_spec.m_nfa_states.elements();
            while ( e.MoveNext() )
            {
                CNfa nfa = (CNfa) e.Current;
                if (nfa.m_edge==CNfa.EMPTY || nfa.m_edge==CNfa.EPSILON)
                    continue; // no change.
                if (nfa.m_edge==CNfa.CCL)
                {
                    CSet ncset = new CSet();
                    ncset.map(nfa.m_set, ccls); // map it.
                    nfa.m_set = ncset;
                }
                else
                { // single character
                    nfa.m_edge = ccls[nfa.m_edge]; // map it.
                }
            }

            // now update m_spec with the mapping.
            m_spec.m_ccls_map = ccls;
            m_spec.m_dtrans_ncols = mapped_charset_size;
        }
Exemplo n.º 3
0
 /***************************************************************
  * Function: reset
  * Description: Clears member variables.
  **************************************************************/
 private void reset
 (
 )
 {
     m_spec      = null;
     m_outstream = null;
 }
Exemplo n.º 4
0
        private int mapped_charset_size;   // reduced charset size

        public void simplify(CSpec m_spec)
        {
            computeClasses(m_spec); // initialize fields.

            // now rewrite the NFA using our character class mapping.
            IEnumerator e = m_spec.m_nfa_states.elements();

            while (e.MoveNext())
            {
                CNfa nfa = (CNfa)e.Current;
                if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON)
                {
                    continue; // no change.
                }
                if (nfa.m_edge == CNfa.CCL)
                {
                    CSet ncset = new CSet();
                    ncset.map(nfa.m_set, ccls); // map it.
                    nfa.m_set = ncset;
                }
                else
                {                                  // single character
                    nfa.m_edge = ccls[nfa.m_edge]; // map it.
                }
            }

            // now update m_spec with the mapping.
            m_spec.m_ccls_map     = ccls;
            m_spec.m_dtrans_ncols = mapped_charset_size;
        }
        /***************************************************************
         * Function: newNLPair
         * Description: return a new CNfaPair that matches a new
         *             line: (\r\n?|[\n\uu2028\uu2029])
         *     Added by CSA 8-Aug-1999, updated 10-Aug-1999
         **************************************************************/
        public static CNfaPair newNLPair(CSpec spec)
        {
            CNfaPair pair = newCNfaPair();

            pair.m_end                 = newCNfa(spec); // newline accepting state
            pair.m_start               = newCNfa(spec); // new state with two epsilon edges
            pair.m_start.m_next        = newCNfa(spec);
            pair.m_start.m_next.m_edge = CNfa.CCL;
            pair.m_start.m_next.m_set  = new CSet();
            pair.m_start.m_next.m_set.add('\n');
            if (spec.m_dtrans_ncols - CSpec.NUM_PSEUDO > 2029)
            {
                pair.m_start.m_next.m_set.add(2028);                 /*U+2028 is LS, the line separator*/
                pair.m_start.m_next.m_set.add(2029);                 /*U+2029 is PS, the paragraph sep.*/
            }
            pair.m_start.m_next.m_next                 = pair.m_end; // accept '\n', U+2028, or U+2029
            pair.m_start.m_next2                       = newCNfa(spec);
            pair.m_start.m_next2.m_edge                = '\r';
            pair.m_start.m_next2.m_next                = newCNfa(spec);
            pair.m_start.m_next2.m_next.m_next         = pair.m_end; // accept '\r';
            pair.m_start.m_next2.m_next.m_next2        = newCNfa(spec);
            pair.m_start.m_next2.m_next.m_next2.m_edge = '\n';
            pair.m_start.m_next2.m_next.m_next2.m_next = pair.m_end; // accept '\r\n';
            return(pair);
        }
Exemplo n.º 6
0
 /***************************************************************
   Function: allocate_BOL_EOF
   Description: Expands character class to include special BOL and
   EOF characters.  Puts numeric index of these characters in
   input CSpec.
   **************************************************************/
 public void allocate_BOL_EOF(
     CSpec spec
     )
 {
     CUtility.ASSERT(CSpec.NUM_PSEUDO==2);
     spec.BOL = spec.m_dtrans_ncols++;
     spec.EOF = spec.m_dtrans_ncols++;
 }
Exemplo n.º 7
0
 /***************************************************************
  * Function: reset
  * Description:
  **************************************************************/
 private void reset
 (
 )
 {
     m_lexGen       = null;
     m_spec         = null;
     m_unmarked_dfa = 0;
 }
 /***************************************************************
  * Function: reset
  * Description: Resets CMakeNfa member variables.
  **************************************************************/
 private void reset
 (
 )
 {
     m_input  = null;
     m_lexGen = null;
     m_spec   = null;
 }
Exemplo n.º 9
0
 /***************************************************************
  * Function: reset
  * Description: Resets member variables.
  **************************************************************/
 private void reset
 (
 )
 {
     m_spec    = null;
     m_group   = null;
     m_ingroup = null;
 }
 /***************************************************************
  * Function: allocate_BOL_EOF
  * Description: Expands character class to include special BOL and
  * EOF characters.  Puts numeric index of these characters in
  * input CSpec.
  **************************************************************/
 public void allocate_BOL_EOF
 (
     CSpec spec
 )
 {
     CUtility.ASSERT(CSpec.NUM_PSEUDO == 2);
     spec.BOL = spec.m_dtrans_ncols++;
     spec.EOF = spec.m_dtrans_ncols++;
 }
Exemplo n.º 11
0
 /***************************************************************
  * Function: Set
  * Description:
  **************************************************************/
 private void Set
 (
     CLexGen lexGen,
     CSpec spec
 )
 {
     m_lexGen       = lexGen;
     m_spec         = spec;
     m_unmarked_dfa = 0;
 }
Exemplo n.º 12
0
 /*************************************************************
   Function: CTrans
   ***********************************************************/
 public CDTrans(
     int label,
     CSpec spec
     )
 {
     m_dtrans = new int[spec.m_dtrans_ncols];
     m_accept = null;
     m_anchor = CSpec.NONE;
     m_label = label;
 }
Exemplo n.º 13
0
 /*************************************************************
  * Function: CTrans
  ***********************************************************/
 public CDTrans
 (
     int label,
     CSpec spec
 )
 {
     m_dtrans = new int[spec.m_dtrans_ncols];
     m_accept = null;
     m_anchor = CSpec.NONE;
     m_label  = label;
 }
Exemplo n.º 14
0
        /***************************************************************
          Function: newCDfa
          **************************************************************/
        public static CDfa newCDfa(
            CSpec spec
            )
        {
            CDfa dfa;

            dfa = new CDfa(spec.m_dfa_states.size());
            spec.m_dfa_states.addElement(dfa);

            return dfa;
        }
Exemplo n.º 15
0
        /***************************************************************
         * Function: newCDfa
         **************************************************************/
        public static CDfa newCDfa
        (
            CSpec spec
        )
        {
            CDfa dfa;

            dfa = new CDfa(spec.m_dfa_states.size());
            spec.m_dfa_states.addElement(dfa);

            return(dfa);
        }
        /***************************************************************
         * Function: thompson
         * Description: High level access function to module.
         * Deposits result in input CSpec.
         **************************************************************/
        public void thompson
        (
            CLexGen lexGen,
            CSpec spec,
            CInput input
        )
        {
            int  i;
            CNfa elem;
            int  size;

            /* Set member variables. */
            reset();
            Set(lexGen, spec, input);

            size = m_spec.m_states.Count;
            m_spec.m_state_rules = new Vector[size];
            for (i = 0; i < size; ++i)
            {
                m_spec.m_state_rules[i] = new Vector();
            }

            /* Initialize current token variable
             * and create nfa. */
            /*m_spec.m_current_token = m_lexGen.EOS;
             * m_lexGen.advance();*/

            m_spec.m_nfa_start = machine();

            /* Set labels in created nfa machine. */
            size = m_spec.m_nfa_states.size();
            for (i = 0; i < size; ++i)
            {
                elem         = (CNfa)m_spec.m_nfa_states.elementAt(i);
                elem.m_label = i;
            }

            /* Debugging output. */
            if (CUtility.DO_DEBUG)
            {
                m_lexGen.print_nfa();
            }

            if (m_spec.m_verbose)
            {
                System.Console.WriteLine("NFA comprised of "
                                         + (m_spec.m_nfa_states.Count + 1)
                                         + " states.");
            }

            reset();
        }
Exemplo n.º 17
0
        /***************************************************************
         * Function: Set
         * Description: Sets member variables.
         **************************************************************/
        private void Set
        (
            CSpec spec
        )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != spec);
            }

            m_spec    = spec;
            m_group   = null;
            m_ingroup = null;
        }
Exemplo n.º 18
0
        /***************************************************************
         * Function: Set
         * Description: Initializes member variables.
         **************************************************************/
        private void Set
        (
            CSpec spec,
            TextWriter outstream
        )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != spec);
                CUtility.ASSERT(null != outstream);
            }

            m_spec      = spec;
            m_outstream = outstream;
        }
Exemplo n.º 19
0
        /***************************************************************
          Function: min_dfa
          Description: High-level access function to module.
          **************************************************************/
        public void min_dfa(
            CSpec spec
            )
        {
            Set(spec);

            /* Remove redundant states. */
            minimize();

            /* Column and row compression.
               Save accept states in auxilary vector. */
            reduce();

            reset();
        }
Exemplo n.º 20
0
        /***************************************************************
          Function: newCNfa
          Description:
          **************************************************************/
        public static CNfa newCNfa(
            CSpec spec
            )
        {
            CNfa p;

            /* UNDONE: Buffer this? */

            p = new CNfa();

            /*p.m_label = spec.m_nfa_states.size();*/
            spec.m_nfa_states.addElement(p);
            p.m_edge = CNfa.EPSILON;

            return p;
        }
Exemplo n.º 21
0
        /***************************************************************
         * Function: min_dfa
         * Description: High-level access function to module.
         **************************************************************/
        public void min_dfa
        (
            CSpec spec
        )
        {
            Set(spec);

            /* Remove redundant states. */
            minimize();

            /* Column and row compression.
             * Save accept states in auxilary vector. */
            reduce();

            reset();
        }
Exemplo n.º 22
0
        /***************************************************************
         * Function: newCNfa
         * Description:
         **************************************************************/
        public static CNfa newCNfa
        (
            CSpec spec
        )
        {
            CNfa p;

            /* UNDONE: Buffer this? */

            p = new CNfa();

            /*p.m_label = spec.m_nfa_states.size();*/
            spec.m_nfa_states.addElement(p);
            p.m_edge = CNfa.EPSILON;

            return(p);
        }
        /** Compute minimum Set of character classes needed to disambiguate
         *  edges.  We optimistically assume that every character belongs to
         *  a single character class, and then incrementally split classes
         *  as we see edges that require discrimination between characters in
         *  the class. [CSA, 25-Jul-1999] */
        private void computeClasses(CSpec m_spec)
        {
            this.original_charset_size = m_spec.m_dtrans_ncols;
            this.ccls = new int[original_charset_size]; // initially all zero.

            int nextcls = 1;
            SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet();
            Hashtable h = new Hashtable();

            System.Console.Write("Working on character classes.");
            IEnumerator e=m_spec.m_nfa_states.elements();
            while ( e.MoveNext() )
            {
                CNfa nfa = (CNfa) e.Current;
                if (nfa.m_edge==CNfa.EMPTY || nfa.m_edge==CNfa.EPSILON)
                    continue; // no discriminatory information.
                clsA.clearAll(); clsB.clearAll();
                for (int i=0; i<ccls.Length; i++)
                    if (nfa.m_edge==i || // edge labeled with a character
                        nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i)) // Set of characters
                        clsA.Set(ccls[i]);
                    else
                        clsB.Set(ccls[i]);
                // now figure out which character classes we need to split.
                clsA.and(clsB); // split the classes which show up on both sides of edge
                System.Console.Write(clsA.size()==0?".":":");
                if (clsA.size()==0) continue; // nothing to do.
                // and split them.
                h.Clear(); // h will map old to new class name
                for (int i=0; i<ccls.Length; i++)
                    if (clsA.Get(ccls[i])) // a split class
                        if (nfa.m_edge==i ||
                            nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i))
                        { // on A side
                            int split = ccls[i];
                            if (!h.ContainsKey(split))
                                h.Add(split, (nextcls++)); // make new class
                            ccls[i] = (int)h[split];
                        }
            }
            System.Console.WriteLine();
            System.Console.WriteLine("NFA has "+nextcls+" distinct character classes.");

            this.mapped_charset_size = nextcls;
        }
        /***************************************************************
         * Function: Set
         * Description: Sets CMakeNfa member variables.
         **************************************************************/
        private void Set
        (
            CLexGen lexGen,
            CSpec spec,
            CInput input
        )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != input);
                CUtility.ASSERT(null != lexGen);
                CUtility.ASSERT(null != spec);
            }

            m_input  = input;
            m_lexGen = lexGen;
            m_spec   = spec;
        }
Exemplo n.º 25
0
        /***************************************************************
         * Function: make_dfa
         * Description: High-level access function to module.
         **************************************************************/
        public void make_dfa
        (
            CLexGen lexGen,
            CSpec spec
        )
        {
            //  int i;

            reset();
            Set(lexGen, spec);

            make_dtrans();
            free_nfa_states();

            if (m_spec.m_verbose && true == CUtility.OLD_DUMP_DEBUG)
            {
                System.Console.WriteLine(m_spec.m_dfa_states.size()
                                         + " DFA states in original machine.");
            }

            free_dfa_states();
        }
Exemplo n.º 26
0
 /***************************************************************
   Function: reset
   Description: Clears member variables.
   **************************************************************/
 private void reset(
     )
 {
     m_spec = null;
     m_outstream = null;
 }
Exemplo n.º 27
0
        /***************************************************************
          Function: emit
          Description: High-level access function to module.
          **************************************************************/
        public void emit(
            CSpec spec,
            System.IO.TextWriter outstream
            )
        {
            Set(spec,outstream);

            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != m_spec);
                CUtility.ASSERT(null != m_outstream);
            }

            if (CUtility.OLD_DEBUG)
            {
                print_details();
            }

            emit_header();
            emit_construct();
            emit_helpers();
            emit_driver();
            emit_footer();

            reset();
        }
Exemplo n.º 28
0
        /***************************************************************
          Function: Set
          Description: Sets member variables.
          **************************************************************/
        private void Set(
            CSpec spec
            )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != spec);
            }

            m_spec = spec;
            m_group = null;
            m_ingroup = null;
        }
Exemplo n.º 29
0
        /***************************************************************
          Function: Set
          Description: Initializes member variables.
          **************************************************************/
        private void Set(
            CSpec spec,
            TextWriter outstream
            )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != spec);
                CUtility.ASSERT(null != outstream);
            }

            m_spec = spec;
            m_outstream = outstream;
        }
Exemplo n.º 30
0
        /***************************************************************
          Function: CLexGen
          **************************************************************/
        public CLexGen(
            string filename
            )
        {
            /* Successful initialization flag. */
            m_init_flag = false;

            /* Open input stream. */
            m_instream = new StreamReader(filename);
            if (null == m_instream)
            {
                System.Console.WriteLine("Error: Unable to open input file "
                    + filename + ".");
                return;
            }

            /* Open output stream. */
            m_outstream
                = new StreamWriter(filename+".cs");
            if (null == m_outstream)
            {
                System.Console.WriteLine("Error: Unable to open output file "
                    + filename + ".java.");
                return;
            }

            /* Create input buffer class. */
            m_input = new CInput(m_instream);

            /* Initialize character hash table. */
            m_tokens = new Hashtable();
            m_tokens.Add('$',AT_EOL);
            m_tokens.Add('(',OPEN_PAREN);
            m_tokens.Add(')',CLOSE_PAREN);
            m_tokens.Add('*',CLOSURE);
            m_tokens.Add('+',PLUS_CLOSE);
            m_tokens.Add('-',DASH);
            m_tokens.Add('.',ANY);
            m_tokens.Add('?',OPTIONAL);
            m_tokens.Add('[',CCL_START);
            m_tokens.Add(']',CCL_END);
            m_tokens.Add('^',AT_BOL);
            m_tokens.Add('{',OPEN_CURLY);
            m_tokens.Add('|',OR);
            m_tokens.Add('}',CLOSE_CURLY);

            /* Initialize spec structure. */
            m_spec = new CSpec(this);

            /* Nfa to dfa converter. */
            m_nfa2dfa = new CNfa2Dfa();
            m_minimize = new CMinimize();
            m_makeNfa = new CMakeNfa();
            m_simplifyNfa = new CSimplifyNfa();

            m_emit = new CEmit();

            /* Successful initialization flag. */
            m_init_flag = true;
        }
Exemplo n.º 31
0
 /***************************************************************
   Function: reset
   Description: Resets member variables.
   **************************************************************/
 private void reset(
     )
 {
     m_spec = null;
     m_group = null;
     m_ingroup = null;
 }
Exemplo n.º 32
0
        /** Compute minimum Set of character classes needed to disambiguate
         *  edges.  We optimistically assume that every character belongs to
         *  a single character class, and then incrementally split classes
         *  as we see edges that require discrimination between characters in
         *  the class. [CSA, 25-Jul-1999] */
        private void computeClasses(CSpec m_spec)
        {
            this.original_charset_size = m_spec.m_dtrans_ncols;
            this.ccls = new int[original_charset_size]; // initially all zero.

            int          nextcls = 1;
            SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet();
            Hashtable    h = new Hashtable();

            System.Console.Write("Working on character classes.");
            IEnumerator e = m_spec.m_nfa_states.elements();

            while (e.MoveNext())
            {
                CNfa nfa = (CNfa)e.Current;
                if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON)
                {
                    continue; // no discriminatory information.
                }
                clsA.clearAll(); clsB.clearAll();
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (nfa.m_edge == i ||                               // edge labeled with a character
                        nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) // Set of characters
                    {
                        clsA.Set(ccls[i]);
                    }
                    else
                    {
                        clsB.Set(ccls[i]);
                    }
                }
                // now figure out which character classes we need to split.
                clsA.and(clsB); // split the classes which show up on both sides of edge
                System.Console.Write(clsA.size() == 0?".":":");
                if (clsA.size() == 0)
                {
                    continue;                 // nothing to do.
                }
                // and split them.
                h.Clear(); // h will map old to new class name
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (clsA.Get(ccls[i])) // a split class
                    {
                        if (nfa.m_edge == i ||
                            nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i))
                        { // on A side
                            int split = ccls[i];
                            if (!h.ContainsKey(split))
                            {
                                h.Add(split, (nextcls++)); // make new class
                            }
                            ccls[i] = (int)h[split];
                        }
                    }
                }
            }
            System.Console.WriteLine();
            System.Console.WriteLine("NFA has " + nextcls + " distinct character classes.");

            this.mapped_charset_size = nextcls;
        }
Exemplo n.º 33
0
 /***************************************************************
   Function: reset
   Description: Resets CMakeNfa member variables.
   **************************************************************/
 private void reset(
     )
 {
     m_input = null;
     m_lexGen = null;
     m_spec = null;
 }
Exemplo n.º 34
0
 /***************************************************************
   Function: newNLPair
   Description: return a new CNfaPair that matches a new
                line: (\r\n?|[\n\uu2028\uu2029])
        Added by CSA 8-Aug-1999, updated 10-Aug-1999
   **************************************************************/
 public static CNfaPair newNLPair(CSpec spec)
 {
     CNfaPair pair = newCNfaPair();
     pair.m_end=newCNfa(spec); // newline accepting state
     pair.m_start=newCNfa(spec); // new state with two epsilon edges
     pair.m_start.m_next = newCNfa(spec);
     pair.m_start.m_next.m_edge = CNfa.CCL;
     pair.m_start.m_next.m_set = new CSet();
     pair.m_start.m_next.m_set.add('\n');
     if (spec.m_dtrans_ncols-CSpec.NUM_PSEUDO > 2029)
     {
         pair.m_start.m_next.m_set.add(2028); /*U+2028 is LS, the line separator*/
         pair.m_start.m_next.m_set.add(2029); /*U+2029 is PS, the paragraph sep.*/
     }
     pair.m_start.m_next.m_next = pair.m_end; // accept '\n', U+2028, or U+2029
     pair.m_start.m_next2 = newCNfa(spec);
     pair.m_start.m_next2.m_edge = '\r';
     pair.m_start.m_next2.m_next = newCNfa(spec);
     pair.m_start.m_next2.m_next.m_next = pair.m_end; // accept '\r';
     pair.m_start.m_next2.m_next.m_next2 = newCNfa(spec);
     pair.m_start.m_next2.m_next.m_next2.m_edge = '\n';
     pair.m_start.m_next2.m_next.m_next2.m_next = pair.m_end; // accept '\r\n';
     return pair;
 }
Exemplo n.º 35
0
        /***************************************************************
          Function: thompson
          Description: High level access function to module.
          Deposits result in input CSpec.
          **************************************************************/
        public void thompson(
            CLexGen lexGen,
            CSpec spec,
            CInput input
            )
        {
            int i;
            CNfa elem;
            int size;

            /* Set member variables. */
            reset();
            Set(lexGen,spec,input);

            size = m_spec.m_states.Count;
            m_spec.m_state_rules = new Vector[size];
            for (i = 0; i < size; ++i)
            {
                m_spec.m_state_rules[i] = new Vector();
            }

            /* Initialize current token variable
               and create nfa. */
            /*m_spec.m_current_token = m_lexGen.EOS;
            m_lexGen.advance();*/

            m_spec.m_nfa_start = machine();

            /* Set labels in created nfa machine. */
            size = m_spec.m_nfa_states.size();
            for (i = 0; i < size; ++i)
            {
                elem = (CNfa) m_spec.m_nfa_states.elementAt(i);
                elem.m_label = i;
            }

            /* Debugging output. */
            if (CUtility.DO_DEBUG)
            {
                m_lexGen.print_nfa();
            }

            if (m_spec.m_verbose)
            {
                System.Console.WriteLine("NFA comprised of "
                    + (m_spec.m_nfa_states.Count + 1)
                    + " states.");
            }

            reset();
        }
Exemplo n.º 36
0
        /***************************************************************
          Function: Set
          Description: Sets CMakeNfa member variables.
          **************************************************************/
        private void Set(
            CLexGen lexGen,
            CSpec spec,
            CInput input
            )
        {
            if (CUtility.DEBUG)
            {
                CUtility.ASSERT(null != input);
                CUtility.ASSERT(null != lexGen);
                CUtility.ASSERT(null != spec);
            }

            m_input = input;
            m_lexGen = lexGen;
            m_spec = spec;
        }