static private int mapped_charset_size; // reduced charset size static internal void simplify(Spec spec) { computeClasses(spec); // initialize fields. /* * now rewrite the NFA using our character class mapping. */ for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = (Nfa)spec.nfa_states[i]; if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON) { continue; // no change. } if (nfa.GetEdge() == Nfa.CCL) { CharSet nset = new CharSet(); nset.map(nfa.GetCharSet(), ccls); // map it. nfa.SetCharSet(nset); } else { // single character nfa.SetEdge(ccls[nfa.GetEdge()]); // map it. } } /* * now update spec with the mapping. */ spec.ccls_map = ccls; spec.dtrans_ncols = mapped_charset_size; }
/* * Function: rule * Description: Recursive descent regular expression parser. */ private static Nfa rule() { NfaPair pair; Nfa start = null; Nfa end = null; int anchor = Spec.NONE; #if DESCENT_DEBUG Utility.enter("rule", spec.lexeme, spec.current_token); #endif pair = Alloc.NewNfaPair(); if (Gen.AT_BOL == spec.current_token) { anchor = anchor | Spec.START; gen.Advance(); expr(pair); start = Alloc.NewNfa(spec); start.SetEdge(spec.BOL); start.SetNext(pair.start); end = pair.end; } else { expr(pair); start = pair.start; end = pair.end; } if (Gen.AT_EOL == spec.current_token) { gen.Advance(); NfaPair nlpair = Alloc.NewNLPair(spec); end.SetNext(Alloc.NewNfa(spec)); Nfa enext = end.GetNext(); enext.SetNext(nlpair.start); enext.SetSib(Alloc.NewNfa(spec)); enext.GetSib().SetEdge(spec.EOF); enext.GetSib().SetNext(nlpair.end); end = nlpair.end; anchor = anchor | Spec.END; } /* check for null rules */ if (end == null) { Error.parse_error(Error.E_ZERO, input.line_number); } /* Handle end of regular expression */ end.SetAccept(gen.packAccept()); end.SetAnchor(anchor); #if DESCENT_DEBUG Utility.leave("rule", spec.lexeme, spec.current_token); #endif return(start); }
/* * Function: machine * Description: Recursive descent regular expression parser. */ private static Nfa machine() { Nfa start; Nfa p; BitSet states; #if DESCENT_DEBUG Utility.enter("machine", spec.lexeme, spec.current_token); #endif start = Alloc.NewNfa(spec); p = start; states = gen.GetStates(); /* Begin: Added for states. */ spec.current_token = Gen.EOS; gen.Advance(); /* End: Added for states. */ if (Gen.END_OF_INPUT != spec.current_token) { p.SetNext(rule()); ProcessStates(states, p.GetNext()); } while (Gen.END_OF_INPUT != spec.current_token) { /* Make state changes HERE. */ states = gen.GetStates(); /* Begin: Added for states. */ gen.Advance(); if (Gen.END_OF_INPUT == spec.current_token) { break; } /* End: Added for states. */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(rule()); ProcessStates(states, p.GetNext()); } /* * add pseudo-rules for BOL and EOF */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(Alloc.NewNfa(spec)); Nfa pnext = p.GetNext(); pnext.SetEdge(Nfa.CCL); pnext.SetNext(Alloc.NewNfa(spec)); pnext.SetCharSet(new CharSet()); pnext.GetCharSet().add(spec.BOL); pnext.GetCharSet().add(spec.EOF); // do-nothing accept rule pnext.GetNext().SetAccept(new Accept(null, input.line_number + 1)); /* add the pseudo rules */ for (int i = 0; i < spec.states.Count; i++) { ArrayList srule = spec.state_rules[i]; srule.Add(pnext); } #if DESCENT_DEBUG Utility.leave("machine", spec.lexeme, spec.current_token); #endif return(start); }