/*************************************************************** * Function: factor * Description: Recursive descent regular expression parser. **************************************************************/ private void factor ( CNfaPair pair ) { CNfa start = null; CNfa end = null; #if (DESCENT_DEBUG) { CUtility.enter("factor", m_spec.m_lexeme, m_spec.m_current_token); } #endif term(pair); if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.PLUS_CLOSE == m_spec.m_current_token || CLexGen.OPTIONAL == m_spec.m_current_token) { start = CAlloc.newCNfa(m_spec); end = CAlloc.newCNfa(m_spec); start.m_next = pair.m_start; pair.m_end.m_next = end; if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.OPTIONAL == m_spec.m_current_token) { start.m_next2 = end; } if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.PLUS_CLOSE == m_spec.m_current_token) { pair.m_end.m_next2 = pair.m_start; } pair.m_start = start; pair.m_end = end; m_lexGen.advance(); } #if (DESCENT_DEBUG) { CUtility.leave("factor", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: expr * Description: Recursive descent regular expression parser. **************************************************************/ private void expr ( CNfaPair pair ) { CNfaPair e2_pair; CNfa p; #if (DESCENT_DEBUG) { CUtility.enter("expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif #if (DEBUG) { CUtility.ASSERT(null != pair); } #endif e2_pair = CAlloc.newCNfaPair(); cat_expr(pair); while (CLexGen.OR == m_spec.m_current_token) { m_lexGen.advance(); cat_expr(e2_pair); p = CAlloc.newCNfa(m_spec); p.m_next2 = e2_pair.m_start; p.m_next = pair.m_start; pair.m_start = p; p = CAlloc.newCNfa(m_spec); pair.m_end.m_next = p; e2_pair.m_end.m_next = p; pair.m_end = p; } #if (DESCENT_DEBUG) { CUtility.leave("expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: cat_expr * Description: Recursive descent regular expression parser. **************************************************************/ private void cat_expr ( CNfaPair pair ) { CNfaPair e2_pair; #if (DESCENT_DEBUG) { CUtility.enter("cat_expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif #if (DEBUG) { CUtility.ASSERT(null != pair); } #endif e2_pair = CAlloc.newCNfaPair(); if (first_in_cat(m_spec.m_current_token)) { factor(pair); } while (first_in_cat(m_spec.m_current_token)) { factor(e2_pair); /* Destroy */ pair.m_end.mimic(e2_pair.m_start); discardCNfa(e2_pair.m_start); pair.m_end = e2_pair.m_end; } #if (DESCENT_DEBUG) { CUtility.leave("cat_expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: dodash * Description: Recursive descent regular expression parser. **************************************************************/ private void dodash ( CSet Set ) { int first = -1; #if (DESCENT_DEBUG) { CUtility.enter("dodash", m_spec.m_lexeme, m_spec.m_current_token); } #endif while (CLexGen.EOS != m_spec.m_current_token && CLexGen.CCL_END != m_spec.m_current_token) { // DASH loses its special meaning if it is first in class. if (CLexGen.DASH == m_spec.m_current_token && -1 != first) { m_lexGen.advance(); // DASH loses its special meaning if it is last in class. if (m_spec.m_current_token == CLexGen.CCL_END) { // 'first' already in Set. Set.add('-'); break; } for ( ; first <= m_spec.m_lexeme; ++first) { if (m_spec.m_ignorecase) { Set.addncase((char)first); } else { Set.add(first); } } } else { first = m_spec.m_lexeme; if (m_spec.m_ignorecase) { Set.addncase(m_spec.m_lexeme); } else { Set.add(m_spec.m_lexeme); } } m_lexGen.advance(); } #if (DESCENT_DEBUG) { CUtility.leave("dodash", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: term * Description: Recursive descent regular expression parser. **************************************************************/ private void term ( CNfaPair pair ) { CNfa start; bool isAlphaL; // int c; #if (DESCENT_DEBUG) { CUtility.enter("term", m_spec.m_lexeme, m_spec.m_current_token); } #endif if (CLexGen.OPEN_PAREN == m_spec.m_current_token) { m_lexGen.advance(); expr(pair); if (CLexGen.CLOSE_PAREN == m_spec.m_current_token) { m_lexGen.advance(); } else { CError.parse_error(CError.E_SYNTAX, m_input.m_line_number); } } else { start = CAlloc.newCNfa(m_spec); pair.m_start = start; start.m_next = CAlloc.newCNfa(m_spec); pair.m_end = start.m_next; if (CLexGen.L == m_spec.m_current_token && Char.IsLetter(m_spec.m_lexeme)) { isAlphaL = true; } else { isAlphaL = false; } if (false == (CLexGen.ANY == m_spec.m_current_token || CLexGen.CCL_START == m_spec.m_current_token || (m_spec.m_ignorecase && isAlphaL))) { start.m_edge = m_spec.m_lexeme; m_lexGen.advance(); } else { start.m_edge = CNfa.CCL; start.m_set = new CSet(); /* Match case-insensitive letters using character class. */ if (m_spec.m_ignorecase && isAlphaL) { start.m_set.addncase(m_spec.m_lexeme); } /* Match dot (.) using character class. */ else if (CLexGen.ANY == m_spec.m_current_token) { start.m_set.add('\n'); start.m_set.add('\r'); // CSA: exclude BOL and EOF from character classes start.m_set.add(m_spec.BOL); start.m_set.add(m_spec.EOF); start.m_set.complement(); } else { m_lexGen.advance(); if (CLexGen.AT_BOL == m_spec.m_current_token) { m_lexGen.advance(); // CSA: exclude BOL and EOF from character classes start.m_set.add(m_spec.BOL); start.m_set.add(m_spec.EOF); start.m_set.complement(); } if (false == (CLexGen.CCL_END == m_spec.m_current_token)) { dodash(start.m_set); } /*else * { * for (c = 0; c <= ' '; ++c) * { * start.m_set.add((byte) c); * } * }*/ } m_lexGen.advance(); } } #if (DESCENT_DEBUG) { CUtility.leave("term", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: rule * Description: Recursive descent regular expression parser. **************************************************************/ private CNfa rule ( ) { CNfaPair pair; //CNfa p; CNfa start = null; CNfa end = null; int anchor = CSpec.NONE; #if (DESCENT_DEBUG) { CUtility.enter("rule", m_spec.m_lexeme, m_spec.m_current_token); } #endif pair = CAlloc.newCNfaPair(); if (CLexGen.AT_BOL == m_spec.m_current_token) { anchor = anchor | CSpec.START; m_lexGen.advance(); expr(pair); // CSA: fixed beginning-of-line operator. 8-aug-1999 start = CAlloc.newCNfa(m_spec); start.m_edge = m_spec.BOL; start.m_next = pair.m_start; end = pair.m_end; } else { expr(pair); start = pair.m_start; end = pair.m_end; } if (CLexGen.AT_EOL == m_spec.m_current_token) { m_lexGen.advance(); // CSA: fixed end-of-line operator. 8-aug-1999 CNfaPair nlpair = CAlloc.newNLPair(m_spec); end.m_next = CAlloc.newCNfa(m_spec); end.m_next.m_next = nlpair.m_start; end.m_next.m_next2 = CAlloc.newCNfa(m_spec); end.m_next.m_next2.m_edge = m_spec.EOF; end.m_next.m_next2.m_next = nlpair.m_end; end = nlpair.m_end; anchor = anchor | CSpec.END; } /* Check for null rules. Charles Fischer found this bug. [CSA] */ if (end == null) { CError.parse_error(CError.E_ZERO, m_input.m_line_number); } /* Handle end of regular expression. See page 103. */ end.m_accept = m_lexGen.packAccept(); end.m_anchor = anchor; /* Begin: Removed for states. */ /*m_lexGen.advance();*/ /* End: Removed for states. */ #if (DESCENT_DEBUG) { CUtility.leave("rule", m_spec.m_lexeme, m_spec.m_current_token); } #endif return(start); }
/*************************************************************** * Function: machine * Description: Recursive descent regular expression parser. **************************************************************/ private CNfa machine ( ) { CNfa start; CNfa p; SparseBitSet states; #if (DESCENT_DEBUG) { CUtility.enter("machine", m_spec.m_lexeme, m_spec.m_current_token); } #endif start = CAlloc.newCNfa(m_spec); p = start; states = m_lexGen.getStates(); /* Begin: Added for states. */ m_spec.m_current_token = CLexGen.EOS; m_lexGen.advance(); /* End: Added for states. */ if (CLexGen.END_OF_INPUT != m_spec.m_current_token) // CSA fix. { p.m_next = rule(); processStates(states, p.m_next); } while (CLexGen.END_OF_INPUT != m_spec.m_current_token) { /* Make state changes HERE. */ states = m_lexGen.getStates(); /* Begin: Added for states. */ m_lexGen.advance(); if (CLexGen.END_OF_INPUT == m_spec.m_current_token) { break; } /* End: Added for states. */ p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = rule(); processStates(states, p.m_next); } // CSA: add pseudo-rules for BOL and EOF SparseBitSet all_states = new SparseBitSet(); for (int i = 0; i < m_spec.m_states.Count; ++i) { all_states.Set(i); } p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_edge = CNfa.CCL; p.m_next.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_set = new CSet(); p.m_next.m_set.add(m_spec.BOL); p.m_next.m_set.add(m_spec.EOF); p.m_next.m_next.m_accept = // do-nothing accept rule new CAccept(new char[0], 0, m_input.m_line_number + 1); processStates(all_states, p.m_next); // CSA: done. #if (DESCENT_DEBUG) { CUtility.leave("machine", m_spec.m_lexeme, m_spec.m_current_token); } #endif return(start); }