/*************************************************************** * Function: newNLPair * Description: return a new CNfaPair that matches a new * line: (\r\n?|[\n\uu2028\uu2029]) * Added by CSA 8-Aug-1999, updated 10-Aug-1999 **************************************************************/ public static CNfaPair newNLPair(CSpec spec) { CNfaPair pair = newCNfaPair(); pair.m_end = newCNfa(spec); // newline accepting state pair.m_start = newCNfa(spec); // new state with two epsilon edges pair.m_start.m_next = newCNfa(spec); pair.m_start.m_next.m_edge = CNfa.CCL; pair.m_start.m_next.m_set = new CSet(); pair.m_start.m_next.m_set.add('\n'); if (spec.m_dtrans_ncols - CSpec.NUM_PSEUDO > 2029) { pair.m_start.m_next.m_set.add(2028); /*U+2028 is LS, the line separator*/ pair.m_start.m_next.m_set.add(2029); /*U+2029 is PS, the paragraph sep.*/ } pair.m_start.m_next.m_next = pair.m_end; // accept '\n', U+2028, or U+2029 pair.m_start.m_next2 = newCNfa(spec); pair.m_start.m_next2.m_edge = '\r'; pair.m_start.m_next2.m_next = newCNfa(spec); pair.m_start.m_next2.m_next.m_next = pair.m_end; // accept '\r'; pair.m_start.m_next2.m_next.m_next2 = newCNfa(spec); pair.m_start.m_next2.m_next.m_next2.m_edge = '\n'; pair.m_start.m_next2.m_next.m_next2.m_next = pair.m_end; // accept '\r\n'; return(pair); }
/*************************************************************** * Function: newCNfaPair * Description: **************************************************************/ public static CNfaPair newCNfaPair ( ) { CNfaPair pair = new CNfaPair(); return(pair); }
/*************************************************************** * Function: factor * Description: Recursive descent regular expression parser. **************************************************************/ private void factor ( CNfaPair pair ) { CNfa start = null; CNfa end = null; #if (DESCENT_DEBUG) { CUtility.enter("factor", m_spec.m_lexeme, m_spec.m_current_token); } #endif term(pair); if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.PLUS_CLOSE == m_spec.m_current_token || CLexGen.OPTIONAL == m_spec.m_current_token) { start = CAlloc.newCNfa(m_spec); end = CAlloc.newCNfa(m_spec); start.m_next = pair.m_start; pair.m_end.m_next = end; if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.OPTIONAL == m_spec.m_current_token) { start.m_next2 = end; } if (CLexGen.CLOSURE == m_spec.m_current_token || CLexGen.PLUS_CLOSE == m_spec.m_current_token) { pair.m_end.m_next2 = pair.m_start; } pair.m_start = start; pair.m_end = end; m_lexGen.advance(); } #if (DESCENT_DEBUG) { CUtility.leave("factor", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: expr * Description: Recursive descent regular expression parser. **************************************************************/ private void expr ( CNfaPair pair ) { CNfaPair e2_pair; CNfa p; #if (DESCENT_DEBUG) { CUtility.enter("expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif #if (DEBUG) { CUtility.ASSERT(null != pair); } #endif e2_pair = CAlloc.newCNfaPair(); cat_expr(pair); while (CLexGen.OR == m_spec.m_current_token) { m_lexGen.advance(); cat_expr(e2_pair); p = CAlloc.newCNfa(m_spec); p.m_next2 = e2_pair.m_start; p.m_next = pair.m_start; pair.m_start = p; p = CAlloc.newCNfa(m_spec); pair.m_end.m_next = p; e2_pair.m_end.m_next = p; pair.m_end = p; } #if (DESCENT_DEBUG) { CUtility.leave("expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: cat_expr * Description: Recursive descent regular expression parser. **************************************************************/ private void cat_expr ( CNfaPair pair ) { CNfaPair e2_pair; #if (DESCENT_DEBUG) { CUtility.enter("cat_expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif #if (DEBUG) { CUtility.ASSERT(null != pair); } #endif e2_pair = CAlloc.newCNfaPair(); if (first_in_cat(m_spec.m_current_token)) { factor(pair); } while (first_in_cat(m_spec.m_current_token)) { factor(e2_pair); /* Destroy */ pair.m_end.mimic(e2_pair.m_start); discardCNfa(e2_pair.m_start); pair.m_end = e2_pair.m_end; } #if (DESCENT_DEBUG) { CUtility.leave("cat_expr", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: term * Description: Recursive descent regular expression parser. **************************************************************/ private void term ( CNfaPair pair ) { CNfa start; bool isAlphaL; // int c; #if (DESCENT_DEBUG) { CUtility.enter("term", m_spec.m_lexeme, m_spec.m_current_token); } #endif if (CLexGen.OPEN_PAREN == m_spec.m_current_token) { m_lexGen.advance(); expr(pair); if (CLexGen.CLOSE_PAREN == m_spec.m_current_token) { m_lexGen.advance(); } else { CError.parse_error(CError.E_SYNTAX, m_input.m_line_number); } } else { start = CAlloc.newCNfa(m_spec); pair.m_start = start; start.m_next = CAlloc.newCNfa(m_spec); pair.m_end = start.m_next; if (CLexGen.L == m_spec.m_current_token && Char.IsLetter(m_spec.m_lexeme)) { isAlphaL = true; } else { isAlphaL = false; } if (false == (CLexGen.ANY == m_spec.m_current_token || CLexGen.CCL_START == m_spec.m_current_token || (m_spec.m_ignorecase && isAlphaL))) { start.m_edge = m_spec.m_lexeme; m_lexGen.advance(); } else { start.m_edge = CNfa.CCL; start.m_set = new CSet(); /* Match case-insensitive letters using character class. */ if (m_spec.m_ignorecase && isAlphaL) { start.m_set.addncase(m_spec.m_lexeme); } /* Match dot (.) using character class. */ else if (CLexGen.ANY == m_spec.m_current_token) { start.m_set.add('\n'); start.m_set.add('\r'); // CSA: exclude BOL and EOF from character classes start.m_set.add(m_spec.BOL); start.m_set.add(m_spec.EOF); start.m_set.complement(); } else { m_lexGen.advance(); if (CLexGen.AT_BOL == m_spec.m_current_token) { m_lexGen.advance(); // CSA: exclude BOL and EOF from character classes start.m_set.add(m_spec.BOL); start.m_set.add(m_spec.EOF); start.m_set.complement(); } if (false == (CLexGen.CCL_END == m_spec.m_current_token)) { dodash(start.m_set); } /*else * { * for (c = 0; c <= ' '; ++c) * { * start.m_set.add((byte) c); * } * }*/ } m_lexGen.advance(); } } #if (DESCENT_DEBUG) { CUtility.leave("term", m_spec.m_lexeme, m_spec.m_current_token); } #endif }
/*************************************************************** * Function: rule * Description: Recursive descent regular expression parser. **************************************************************/ private CNfa rule ( ) { CNfaPair pair; //CNfa p; CNfa start = null; CNfa end = null; int anchor = CSpec.NONE; #if (DESCENT_DEBUG) { CUtility.enter("rule", m_spec.m_lexeme, m_spec.m_current_token); } #endif pair = CAlloc.newCNfaPair(); if (CLexGen.AT_BOL == m_spec.m_current_token) { anchor = anchor | CSpec.START; m_lexGen.advance(); expr(pair); // CSA: fixed beginning-of-line operator. 8-aug-1999 start = CAlloc.newCNfa(m_spec); start.m_edge = m_spec.BOL; start.m_next = pair.m_start; end = pair.m_end; } else { expr(pair); start = pair.m_start; end = pair.m_end; } if (CLexGen.AT_EOL == m_spec.m_current_token) { m_lexGen.advance(); // CSA: fixed end-of-line operator. 8-aug-1999 CNfaPair nlpair = CAlloc.newNLPair(m_spec); end.m_next = CAlloc.newCNfa(m_spec); end.m_next.m_next = nlpair.m_start; end.m_next.m_next2 = CAlloc.newCNfa(m_spec); end.m_next.m_next2.m_edge = m_spec.EOF; end.m_next.m_next2.m_next = nlpair.m_end; end = nlpair.m_end; anchor = anchor | CSpec.END; } /* Check for null rules. Charles Fischer found this bug. [CSA] */ if (end == null) { CError.parse_error(CError.E_ZERO, m_input.m_line_number); } /* Handle end of regular expression. See page 103. */ end.m_accept = m_lexGen.packAccept(); end.m_anchor = anchor; /* Begin: Removed for states. */ /*m_lexGen.advance();*/ /* End: Removed for states. */ #if (DESCENT_DEBUG) { CUtility.leave("rule", m_spec.m_lexeme, m_spec.m_current_token); } #endif return(start); }