Пример #1
0
        private int mapped_charset_size;   // reduced charset size

        public void simplify(CSpec m_spec)
        {
            computeClasses(m_spec); // initialize fields.

            // now rewrite the NFA using our character class mapping.
            IEnumerator e = m_spec.m_nfa_states.elements();

            while (e.MoveNext())
            {
                CNfa nfa = (CNfa)e.Current;
                if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON)
                {
                    continue; // no change.
                }
                if (nfa.m_edge == CNfa.CCL)
                {
                    CSet ncset = new CSet();
                    ncset.map(nfa.m_set, ccls); // map it.
                    nfa.m_set = ncset;
                }
                else
                {                                  // single character
                    nfa.m_edge = ccls[nfa.m_edge]; // map it.
                }
            }

            // now update m_spec with the mapping.
            m_spec.m_ccls_map     = ccls;
            m_spec.m_dtrans_ncols = mapped_charset_size;
        }
Пример #2
0
        /********************************************************
         *    Function: mimic
         *    Description: Converts this NFA state into a copy of
         *    the input one.
         *******************************************************/
        public void mimic
        (
            CNfa nfa
        )
        {
            m_edge = nfa.m_edge;

            if (null != nfa.m_set)
            {
                if (null == m_set)
                {
                    m_set = new CSet();
                }
                m_set.mimic(nfa.m_set);
            }
            else
            {
                m_set = null;
            }

            m_next   = nfa.m_next;
            m_next2  = nfa.m_next2;
            m_accept = nfa.m_accept;
            m_anchor = nfa.m_anchor;

            if (null != nfa.m_states)
            {
                m_states = (SparseBitSet)nfa.m_states.Clone();
            }
            else
            {
                m_states = null;
            }
        }
Пример #3
0
 /***************************************************************
  *    Function: CNfaPair
  **************************************************************/
 public CNfaPair
 (
 )
 {
     m_start = null;
     m_end   = null;
 }
Пример #4
0
 /***************************************************************
  *    Function: discardCNfa
  *    Description:
  **************************************************************/
 private void discardCNfa
 (
     CNfa nfa
 )
 {
     m_spec.m_nfa_states.removeElement(nfa);
 }
Пример #5
0
 /********************************************************
  *    Function: CNfa
  *******************************************************/
 public CNfa
 (
 )
 {
     m_edge   = EMPTY;
     m_set    = null;
     m_next   = null;
     m_next2  = null;
     m_accept = null;
     m_anchor = CSpec.NONE;
     m_label  = NO_LABEL;
     m_states = null;
 }
Пример #6
0
        /***************************************************************
         *    Function: factor
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void factor
        (
            CNfaPair pair
        )
        {
            CNfa start = null;
            CNfa end   = null;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("factor", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            term(pair);

            if (CLexGen.CLOSURE == m_spec.m_current_token ||
                CLexGen.PLUS_CLOSE == m_spec.m_current_token ||
                CLexGen.OPTIONAL == m_spec.m_current_token)
            {
                start = CAlloc.newCNfa(m_spec);
                end   = CAlloc.newCNfa(m_spec);

                start.m_next      = pair.m_start;
                pair.m_end.m_next = end;

                if (CLexGen.CLOSURE == m_spec.m_current_token ||
                    CLexGen.OPTIONAL == m_spec.m_current_token)
                {
                    start.m_next2 = end;
                }

                if (CLexGen.CLOSURE == m_spec.m_current_token ||
                    CLexGen.PLUS_CLOSE == m_spec.m_current_token)
                {
                    pair.m_end.m_next2 = pair.m_start;
                }

                pair.m_start = start;
                pair.m_end   = end;
                m_lexGen.advance();
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("factor", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Пример #7
0
        /***************************************************************
         *    Function: newCNfa
         *    Description:
         **************************************************************/
        public static CNfa newCNfa
        (
            CSpec spec
        )
        {
            CNfa p;

            /* UNDONE: Buffer this? */

            p = new CNfa();

            /*p.m_label = spec.m_nfa_states.size();*/
            spec.m_nfa_states.addElement(p);
            p.m_edge = CNfa.EPSILON;

            return(p);
        }
Пример #8
0
        /***************************************************************
         *    Function: processStates
         *    Description:
         **************************************************************/
        private void processStates
        (
            SparseBitSet states,
            CNfa current
        )
        {
            int size;
            int i;

            size = m_spec.m_states.Count;
            for (i = 0; i < size; ++i)
            {
                if (states.Get(i))
                {
                    m_spec.m_state_rules[i].addElement(current);
                }
            }
        }
Пример #9
0
        /***************************************************************
         *    Function: CSpec
         *    Description: Constructor.
         **************************************************************/
        public CSpec
        (
            CLexGen lexGen
        )
        {
            m_lexGen = lexGen;

            /* Initialize regular expression token variables. */
            m_current_token = CLexGen.EOS;
            m_lexeme        = '\0';
            m_in_quote      = false;
            m_in_ccl        = false;

            /* Initialize hashtable for lexer states. */
            m_states = new Hashtable();
            m_states.Add("YYINITIAL", m_states.Count);

            /* Initialize hashtable for lexical macros. */
            m_macros = new Hashtable();

            /* Initialize variables for lexer options. */
            m_integer_type   = false;
            m_intwrap_type   = false;
            m_count_lines    = false;
            m_count_chars    = true;
            m_cup_compatible = false;
            m_unix           = true;
            m_public         = false;
            m_yyeof          = false;
            m_ignorecase     = false;

            /* Initialize variables for JLex runtime options. */
            m_verbose = false;

            m_nfa_start  = null;
            m_nfa_states = new Vector();

            m_dfa_states = new Vector();
            m_dfa_sets   = new Hashtable();

            m_dtrans_vector = new Vector();
            m_dtrans_ncols  = CUtility.MAX_SEVEN_BIT + 1;
            m_row_map       = null;
            m_col_map       = null;

            m_accept_vector = null;
            m_anchor_array  = null;

            m_init_code = null;
            m_init_read = 0;

            m_init_throw_code = null;
            m_init_throw_read = 0;

            m_yylex_throw_code = null;
            m_yylex_throw_read = 0;

            m_class_code = null;
            m_class_read = 0;

            m_eof_code = null;
            m_eof_read = 0;

            m_eof_value_code = null;
            m_eof_value_read = 0;

            m_eof_throw_code = null;
            m_eof_throw_read = 0;

            m_state_dtrans = null;

            m_state_rules = null;
        }
Пример #10
0
        /** Compute minimum Set of character classes needed to disambiguate
         *  edges.  We optimistically assume that every character belongs to
         *  a single character class, and then incrementally split classes
         *  as we see edges that require discrimination between characters in
         *  the class. [CSA, 25-Jul-1999] */
        private void computeClasses(CSpec m_spec)
        {
            this.original_charset_size = m_spec.m_dtrans_ncols;
            this.ccls = new int[original_charset_size]; // initially all zero.

            int          nextcls = 1;
            SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet();
            Hashtable    h = new Hashtable();

            if (m_spec.m_verbose)
            {
                System.Console.Write("Working on character classes.");
            }
            IEnumerator e = m_spec.m_nfa_states.elements();

            while (e.MoveNext())
            {
                CNfa nfa = (CNfa)e.Current;
                if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON)
                {
                    continue; // no discriminatory information.
                }
                clsA.clearAll(); clsB.clearAll();
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (nfa.m_edge == i ||                               // edge labeled with a character
                        nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) // Set of characters
                    {
                        clsA.Set(ccls[i]);
                    }
                    else
                    {
                        clsB.Set(ccls[i]);
                    }
                }
                // now figure out which character classes we need to split.
                clsA.and(clsB); // split the classes which show up on both sides of edge
                if (m_spec.m_verbose)
                {
                    System.Console.Write(clsA.size() == 0?".":":");
                }
                if (clsA.size() == 0)
                {
                    continue;         // nothing to do.
                }
                // and split them.
                h.Clear(); // h will map old to new class name
                for (int i = 0; i < ccls.Length; i++)
                {
                    if (clsA.Get(ccls[i])) // a split class
                    {
                        if (nfa.m_edge == i ||
                            nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i))
                        { // on A side
                            int split = ccls[i];
                            if (!h.ContainsKey(split))
                            {
                                h.Add(split, (nextcls++)); // make new class
                            }
                            ccls[i] = (int)h[split];
                        }
                    }
                }
            }
            if (m_spec.m_verbose)
            {
                System.Console.WriteLine();
                System.Console.WriteLine("NFA has " + nextcls + " distinct character classes.");
            }

            this.mapped_charset_size = nextcls;
        }
Пример #11
0
        /***************************************************************
         *    Function: rule
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private CNfa rule
        (
        )
        {
            CNfaPair pair;
            //CNfa p;
            CNfa start  = null;
            CNfa end    = null;
            int  anchor = CSpec.NONE;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("rule", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            pair = CAlloc.newCNfaPair();

            if (CLexGen.AT_BOL == m_spec.m_current_token)
            {
                anchor = anchor | CSpec.START;
                m_lexGen.advance();
                expr(pair);

                // CSA: fixed beginning-of-line operator. 8-aug-1999
                start        = CAlloc.newCNfa(m_spec);
                start.m_edge = m_spec.BOL;
                start.m_next = pair.m_start;
                end          = pair.m_end;
            }
            else
            {
                expr(pair);
                start = pair.m_start;
                end   = pair.m_end;
            }

            if (CLexGen.AT_EOL == m_spec.m_current_token)
            {
                m_lexGen.advance();
                // CSA: fixed end-of-line operator. 8-aug-1999
                CNfaPair nlpair = CAlloc.newNLPair(m_spec);
                end.m_next                = CAlloc.newCNfa(m_spec);
                end.m_next.m_next         = nlpair.m_start;
                end.m_next.m_next2        = CAlloc.newCNfa(m_spec);
                end.m_next.m_next2.m_edge = m_spec.EOF;
                end.m_next.m_next2.m_next = nlpair.m_end;
                end    = nlpair.m_end;
                anchor = anchor | CSpec.END;
            }

            /* Check for null rules. Charles Fischer found this bug. [CSA] */
            if (end == null)
            {
                CError.parse_error(CError.E_ZERO, m_input.m_line_number);
            }

            /* Handle end of regular expression.  See page 103. */
            end.m_accept = m_lexGen.packAccept();
            end.m_anchor = anchor;

            /* Begin: Removed for states. */
            /*m_lexGen.advance();*/
            /* End: Removed for states. */

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("rule", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            return(start);
        }