Example #1
0
        /***************************************************************
         *    Function: factor
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void factor
        (
            CNfaPair pair
        )
        {
            CNfa start = null;
            CNfa end   = null;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("factor", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            term(pair);

            if (CLexGen.CLOSURE == m_spec.m_current_token ||
                CLexGen.PLUS_CLOSE == m_spec.m_current_token ||
                CLexGen.OPTIONAL == m_spec.m_current_token)
            {
                start = CAlloc.newCNfa(m_spec);
                end   = CAlloc.newCNfa(m_spec);

                start.m_next      = pair.m_start;
                pair.m_end.m_next = end;

                if (CLexGen.CLOSURE == m_spec.m_current_token ||
                    CLexGen.OPTIONAL == m_spec.m_current_token)
                {
                    start.m_next2 = end;
                }

                if (CLexGen.CLOSURE == m_spec.m_current_token ||
                    CLexGen.PLUS_CLOSE == m_spec.m_current_token)
                {
                    pair.m_end.m_next2 = pair.m_start;
                }

                pair.m_start = start;
                pair.m_end   = end;
                m_lexGen.advance();
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("factor", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Example #2
0
        /***************************************************************
         *    Function: expr
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void expr
        (
            CNfaPair pair
        )
        {
            CNfaPair e2_pair;
            CNfa     p;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("expr", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

      #if (DEBUG)
            {
                CUtility.ASSERT(null != pair);
            }
      #endif

            e2_pair = CAlloc.newCNfaPair();

            cat_expr(pair);

            while (CLexGen.OR == m_spec.m_current_token)
            {
                m_lexGen.advance();
                cat_expr(e2_pair);

                p            = CAlloc.newCNfa(m_spec);
                p.m_next2    = e2_pair.m_start;
                p.m_next     = pair.m_start;
                pair.m_start = p;

                p = CAlloc.newCNfa(m_spec);
                pair.m_end.m_next    = p;
                e2_pair.m_end.m_next = p;
                pair.m_end           = p;
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("expr", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Example #3
0
        /***************************************************************
         *    Function: cat_expr
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void cat_expr
        (
            CNfaPair pair
        )
        {
            CNfaPair e2_pair;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("cat_expr", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

      #if (DEBUG)
            {
                CUtility.ASSERT(null != pair);
            }
      #endif

            e2_pair = CAlloc.newCNfaPair();

            if (first_in_cat(m_spec.m_current_token))
            {
                factor(pair);
            }

            while (first_in_cat(m_spec.m_current_token))
            {
                factor(e2_pair);

                /* Destroy */
                pair.m_end.mimic(e2_pair.m_start);
                discardCNfa(e2_pair.m_start);

                pair.m_end = e2_pair.m_end;
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("cat_expr", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Example #4
0
        /***************************************************************
         *    Function: dodash
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void dodash
        (
            CSet Set
        )
        {
            int first = -1;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("dodash", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            while (CLexGen.EOS != m_spec.m_current_token &&
                   CLexGen.CCL_END != m_spec.m_current_token)
            {
                // DASH loses its special meaning if it is first in class.
                if (CLexGen.DASH == m_spec.m_current_token && -1 != first)
                {
                    m_lexGen.advance();
                    // DASH loses its special meaning if it is last in class.
                    if (m_spec.m_current_token == CLexGen.CCL_END)
                    {
                        // 'first' already in Set.
                        Set.add('-');
                        break;
                    }
                    for ( ; first <= m_spec.m_lexeme; ++first)
                    {
                        if (m_spec.m_ignorecase)
                        {
                            Set.addncase((char)first);
                        }
                        else
                        {
                            Set.add(first);
                        }
                    }
                }
                else
                {
                    first = m_spec.m_lexeme;
                    if (m_spec.m_ignorecase)
                    {
                        Set.addncase(m_spec.m_lexeme);
                    }
                    else
                    {
                        Set.add(m_spec.m_lexeme);
                    }
                }

                m_lexGen.advance();
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("dodash", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Example #5
0
        /***************************************************************
         *    Function: term
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private void term
        (
            CNfaPair pair
        )
        {
            CNfa start;
            bool isAlphaL;

            // int c;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("term", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            if (CLexGen.OPEN_PAREN == m_spec.m_current_token)
            {
                m_lexGen.advance();
                expr(pair);

                if (CLexGen.CLOSE_PAREN == m_spec.m_current_token)
                {
                    m_lexGen.advance();
                }
                else
                {
                    CError.parse_error(CError.E_SYNTAX, m_input.m_line_number);
                }
            }
            else
            {
                start        = CAlloc.newCNfa(m_spec);
                pair.m_start = start;

                start.m_next = CAlloc.newCNfa(m_spec);
                pair.m_end   = start.m_next;

                if (CLexGen.L == m_spec.m_current_token &&
                    Char.IsLetter(m_spec.m_lexeme))
                {
                    isAlphaL = true;
                }
                else
                {
                    isAlphaL = false;
                }
                if (false == (CLexGen.ANY == m_spec.m_current_token ||
                              CLexGen.CCL_START == m_spec.m_current_token ||
                              (m_spec.m_ignorecase && isAlphaL)))
                {
                    start.m_edge = m_spec.m_lexeme;
                    m_lexGen.advance();
                }
                else
                {
                    start.m_edge = CNfa.CCL;

                    start.m_set = new CSet();

                    /* Match case-insensitive letters using character class. */
                    if (m_spec.m_ignorecase && isAlphaL)
                    {
                        start.m_set.addncase(m_spec.m_lexeme);
                    }
                    /* Match dot (.) using character class. */
                    else if (CLexGen.ANY == m_spec.m_current_token)
                    {
                        start.m_set.add('\n');
                        start.m_set.add('\r');
                        // CSA: exclude BOL and EOF from character classes
                        start.m_set.add(m_spec.BOL);
                        start.m_set.add(m_spec.EOF);
                        start.m_set.complement();
                    }
                    else
                    {
                        m_lexGen.advance();
                        if (CLexGen.AT_BOL == m_spec.m_current_token)
                        {
                            m_lexGen.advance();

                            // CSA: exclude BOL and EOF from character classes
                            start.m_set.add(m_spec.BOL);
                            start.m_set.add(m_spec.EOF);
                            start.m_set.complement();
                        }
                        if (false == (CLexGen.CCL_END == m_spec.m_current_token))
                        {
                            dodash(start.m_set);
                        }

                        /*else
                         *            {
                         *          for (c = 0; c <= ' '; ++c)
                         *            {
                         *              start.m_set.add((byte) c);
                         *            }
                         *            }*/
                    }
                    m_lexGen.advance();
                }
            }

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("term", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif
        }
Example #6
0
        /***************************************************************
         *    Function: rule
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private CNfa rule
        (
        )
        {
            CNfaPair pair;
            //CNfa p;
            CNfa start  = null;
            CNfa end    = null;
            int  anchor = CSpec.NONE;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("rule", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            pair = CAlloc.newCNfaPair();

            if (CLexGen.AT_BOL == m_spec.m_current_token)
            {
                anchor = anchor | CSpec.START;
                m_lexGen.advance();
                expr(pair);

                // CSA: fixed beginning-of-line operator. 8-aug-1999
                start        = CAlloc.newCNfa(m_spec);
                start.m_edge = m_spec.BOL;
                start.m_next = pair.m_start;
                end          = pair.m_end;
            }
            else
            {
                expr(pair);
                start = pair.m_start;
                end   = pair.m_end;
            }

            if (CLexGen.AT_EOL == m_spec.m_current_token)
            {
                m_lexGen.advance();
                // CSA: fixed end-of-line operator. 8-aug-1999
                CNfaPair nlpair = CAlloc.newNLPair(m_spec);
                end.m_next                = CAlloc.newCNfa(m_spec);
                end.m_next.m_next         = nlpair.m_start;
                end.m_next.m_next2        = CAlloc.newCNfa(m_spec);
                end.m_next.m_next2.m_edge = m_spec.EOF;
                end.m_next.m_next2.m_next = nlpair.m_end;
                end    = nlpair.m_end;
                anchor = anchor | CSpec.END;
            }

            /* Check for null rules. Charles Fischer found this bug. [CSA] */
            if (end == null)
            {
                CError.parse_error(CError.E_ZERO, m_input.m_line_number);
            }

            /* Handle end of regular expression.  See page 103. */
            end.m_accept = m_lexGen.packAccept();
            end.m_anchor = anchor;

            /* Begin: Removed for states. */
            /*m_lexGen.advance();*/
            /* End: Removed for states. */

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("rule", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            return(start);
        }
Example #7
0
        /***************************************************************
         *    Function: machine
         *    Description: Recursive descent regular expression parser.
         **************************************************************/
        private CNfa machine
        (
        )
        {
            CNfa         start;
            CNfa         p;
            SparseBitSet states;

      #if (DESCENT_DEBUG)
            {
                CUtility.enter("machine", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            start = CAlloc.newCNfa(m_spec);
            p     = start;

            states = m_lexGen.getStates();

            /* Begin: Added for states. */
            m_spec.m_current_token = CLexGen.EOS;
            m_lexGen.advance();
            /* End: Added for states. */

            if (CLexGen.END_OF_INPUT != m_spec.m_current_token) // CSA fix.
            {
                p.m_next = rule();

                processStates(states, p.m_next);
            }

            while (CLexGen.END_OF_INPUT != m_spec.m_current_token)
            {
                /* Make state changes HERE. */
                states = m_lexGen.getStates();

                /* Begin: Added for states. */
                m_lexGen.advance();
                if (CLexGen.END_OF_INPUT == m_spec.m_current_token)
                {
                    break;
                }
                /* End: Added for states. */

                p.m_next2 = CAlloc.newCNfa(m_spec);
                p         = p.m_next2;
                p.m_next  = rule();

                processStates(states, p.m_next);
            }

            // CSA: add pseudo-rules for BOL and EOF
            SparseBitSet all_states = new SparseBitSet();
            for (int i = 0; i < m_spec.m_states.Count; ++i)
            {
                all_states.Set(i);
            }
            p.m_next2       = CAlloc.newCNfa(m_spec);
            p               = p.m_next2;
            p.m_next        = CAlloc.newCNfa(m_spec);
            p.m_next.m_edge = CNfa.CCL;
            p.m_next.m_next = CAlloc.newCNfa(m_spec);
            p.m_next.m_set  = new CSet();
            p.m_next.m_set.add(m_spec.BOL);
            p.m_next.m_set.add(m_spec.EOF);
            p.m_next.m_next.m_accept = // do-nothing accept rule
                                       new CAccept(new char[0], 0, m_input.m_line_number + 1);
            processStates(all_states, p.m_next);
            // CSA: done.

      #if (DESCENT_DEBUG)
            {
                CUtility.leave("machine", m_spec.m_lexeme, m_spec.m_current_token);
            }
      #endif

            return(start);
        }