/*************************************************************** * Function: emit * Description: High-level access function to module. **************************************************************/ public void emit ( CSpec spec, System.IO.TextWriter outstream ) { Set(spec, outstream); #if (DEBUG) { CUtility.ASSERT(null != m_spec); CUtility.ASSERT(null != m_outstream); } #endif // #if (OLD_DEBUG) // { // print_details(); // } emit_header(); emit_construct(); emit_helpers(); emit_driver(); emit_footer(); reset(); }
/*************************************************************** * Function: reset * Description: Clears member variables. **************************************************************/ private void reset ( ) { m_spec = null; m_outstream = null; }
private int mapped_charset_size; // reduced charset size public void simplify(CSpec m_spec) { computeClasses(m_spec); // initialize fields. // now rewrite the NFA using our character class mapping. IEnumerator e = m_spec.m_nfa_states.elements(); while (e.MoveNext()) { CNfa nfa = (CNfa)e.Current; if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON) { continue; // no change. } if (nfa.m_edge == CNfa.CCL) { CSet ncset = new CSet(); ncset.map(nfa.m_set, ccls); // map it. nfa.m_set = ncset; } else { // single character nfa.m_edge = ccls[nfa.m_edge]; // map it. } } // now update m_spec with the mapping. m_spec.m_ccls_map = ccls; m_spec.m_dtrans_ncols = mapped_charset_size; }
/*************************************************************** * Function: newNLPair * Description: return a new CNfaPair that matches a new * line: (\r\n?|[\n\uu2028\uu2029]) * Added by CSA 8-Aug-1999, updated 10-Aug-1999 **************************************************************/ public static CNfaPair newNLPair(CSpec spec) { CNfaPair pair = newCNfaPair(); pair.m_end = newCNfa(spec); // newline accepting state pair.m_start = newCNfa(spec); // new state with two epsilon edges pair.m_start.m_next = newCNfa(spec); pair.m_start.m_next.m_edge = CNfa.CCL; pair.m_start.m_next.m_set = new CSet(); pair.m_start.m_next.m_set.add('\n'); if (spec.m_dtrans_ncols - CSpec.NUM_PSEUDO > 2029) { pair.m_start.m_next.m_set.add(2028); /*U+2028 is LS, the line separator*/ pair.m_start.m_next.m_set.add(2029); /*U+2029 is PS, the paragraph sep.*/ } pair.m_start.m_next.m_next = pair.m_end; // accept '\n', U+2028, or U+2029 pair.m_start.m_next2 = newCNfa(spec); pair.m_start.m_next2.m_edge = '\r'; pair.m_start.m_next2.m_next = newCNfa(spec); pair.m_start.m_next2.m_next.m_next = pair.m_end; // accept '\r'; pair.m_start.m_next2.m_next.m_next2 = newCNfa(spec); pair.m_start.m_next2.m_next.m_next2.m_edge = '\n'; pair.m_start.m_next2.m_next.m_next2.m_next = pair.m_end; // accept '\r\n'; return(pair); }
/*************************************************************** * Function: reset * Description: Resets member variables. **************************************************************/ private void reset ( ) { m_spec = null; m_group = null; m_ingroup = null; }
/*************************************************************** * Function: reset * Description: **************************************************************/ private void reset ( ) { m_lexGen = null; m_spec = null; m_unmarked_dfa = 0; }
/*************************************************************** * Function: reset * Description: Resets CMakeNfa member variables. **************************************************************/ private void reset ( ) { m_input = null; m_lexGen = null; m_spec = null; }
/*************************************************************** * Function: allocate_BOL_EOF * Description: Expands character class to include special BOL and * EOF characters. Puts numeric index of these characters in * input CSpec. **************************************************************/ public void allocate_BOL_EOF ( CSpec spec ) { CUtility.ASSERT(CSpec.NUM_PSEUDO == 2); spec.BOL = spec.m_dtrans_ncols++; spec.EOF = spec.m_dtrans_ncols++; }
/*************************************************************** * Function: Set * Description: **************************************************************/ private void Set ( CLexGen lexGen, CSpec spec ) { m_lexGen = lexGen; m_spec = spec; m_unmarked_dfa = 0; }
/************************************************************* * Function: CTrans ***********************************************************/ public CDTrans ( int label, CSpec spec ) { m_dtrans = new int[spec.m_dtrans_ncols]; m_accept = null; m_anchor = CSpec.NONE; m_label = label; }
/*************************************************************** * Function: newCDfa **************************************************************/ public static CDfa newCDfa ( CSpec spec ) { CDfa dfa; dfa = new CDfa(spec.m_dfa_states.size()); spec.m_dfa_states.addElement(dfa); return(dfa); }
/*************************************************************** * Function: thompson * Description: High level access function to module. * Deposits result in input CSpec. **************************************************************/ public void thompson ( CLexGen lexGen, CSpec spec, CInput input ) { int i; CNfa elem; int size; /* Set member variables. */ reset(); Set(lexGen, spec, input); size = m_spec.m_states.Count; m_spec.m_state_rules = new Vector[size]; for (i = 0; i < size; ++i) { m_spec.m_state_rules[i] = new Vector(); } /* Initialize current token variable * and create nfa. */ /*m_spec.m_current_token = m_lexGen.EOS; * m_lexGen.advance();*/ m_spec.m_nfa_start = machine(); /* Set labels in created nfa machine. */ size = m_spec.m_nfa_states.size(); for (i = 0; i < size; ++i) { elem = (CNfa)m_spec.m_nfa_states.elementAt(i); elem.m_label = i; } /* Debugging output. */ #if (DO_DEBUG) { m_lexGen.print_nfa(); } #endif if (m_spec.m_verbose) { System.Console.WriteLine("NFA comprised of " + (m_spec.m_nfa_states.Count + 1) + " states."); } reset(); }
/*************************************************************** * Function: Set * Description: Sets member variables. **************************************************************/ private void Set ( CSpec spec ) { #if (DEBUG) { CUtility.ASSERT(null != spec); } #endif m_spec = spec; m_group = null; m_ingroup = null; }
/*************************************************************** * Function: min_dfa * Description: High-level access function to module. **************************************************************/ public void min_dfa ( CSpec spec ) { Set(spec); /* Remove redundant states. */ minimize(); /* Column and row compression. * Save accept states in auxilary vector. */ reduce(); reset(); }
/*************************************************************** * Function: Set * Description: Initializes member variables. **************************************************************/ private void Set ( CSpec spec, TextWriter outstream ) { #if (DEBUG) { CUtility.ASSERT(null != spec); CUtility.ASSERT(null != outstream); } #endif m_spec = spec; m_outstream = outstream; }
/*************************************************************** * Function: newCNfa * Description: **************************************************************/ public static CNfa newCNfa ( CSpec spec ) { CNfa p; /* UNDONE: Buffer this? */ p = new CNfa(); /*p.m_label = spec.m_nfa_states.size();*/ spec.m_nfa_states.addElement(p); p.m_edge = CNfa.EPSILON; return(p); }
/*************************************************************** * Function: Set * Description: Sets CMakeNfa member variables. **************************************************************/ private void Set ( CLexGen lexGen, CSpec spec, CInput input ) { #if (DEBUG) { CUtility.ASSERT(null != input); CUtility.ASSERT(null != lexGen); CUtility.ASSERT(null != spec); } #endif m_input = input; m_lexGen = lexGen; m_spec = spec; }
/*************************************************************** * Function: make_dfa * Description: High-level access function to module. **************************************************************/ public void make_dfa ( CLexGen lexGen, CSpec spec ) { // int i; reset(); Set(lexGen, spec); make_dtrans(); free_nfa_states(); if (m_spec.m_verbose && CUtility.OLD_DUMP_DEBUG) { System.Console.WriteLine(m_spec.m_dfa_states.size() + " DFA states in original machine."); } free_dfa_states(); }
/** Compute minimum Set of character classes needed to disambiguate * edges. We optimistically assume that every character belongs to * a single character class, and then incrementally split classes * as we see edges that require discrimination between characters in * the class. [CSA, 25-Jul-1999] */ private void computeClasses(CSpec m_spec) { this.original_charset_size = m_spec.m_dtrans_ncols; this.ccls = new int[original_charset_size]; // initially all zero. int nextcls = 1; SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet(); Hashtable h = new Hashtable(); if (m_spec.m_verbose) { System.Console.Write("Working on character classes."); } IEnumerator e = m_spec.m_nfa_states.elements(); while (e.MoveNext()) { CNfa nfa = (CNfa)e.Current; if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON) { continue; // no discriminatory information. } clsA.clearAll(); clsB.clearAll(); for (int i = 0; i < ccls.Length; i++) { if (nfa.m_edge == i || // edge labeled with a character nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) // Set of characters { clsA.Set(ccls[i]); } else { clsB.Set(ccls[i]); } } // now figure out which character classes we need to split. clsA.and(clsB); // split the classes which show up on both sides of edge if (m_spec.m_verbose) { System.Console.Write(clsA.size() == 0?".":":"); } if (clsA.size() == 0) { continue; // nothing to do. } // and split them. h.Clear(); // h will map old to new class name for (int i = 0; i < ccls.Length; i++) { if (clsA.Get(ccls[i])) // a split class { if (nfa.m_edge == i || nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) { // on A side int split = ccls[i]; if (!h.ContainsKey(split)) { h.Add(split, (nextcls++)); // make new class } ccls[i] = (int)h[split]; } } } } if (m_spec.m_verbose) { System.Console.WriteLine(); System.Console.WriteLine("NFA has " + nextcls + " distinct character classes."); } this.mapped_charset_size = nextcls; }