static private int mapped_charset_size; // reduced charset size static internal void simplify(Spec spec) { computeClasses(spec); // initialize fields. /* * now rewrite the NFA using our character class mapping. */ for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = (Nfa)spec.nfa_states[i]; if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON) { continue; // no change. } if (nfa.GetEdge() == Nfa.CCL) { CharSet nset = new CharSet(); nset.map(nfa.GetCharSet(), ccls); // map it. nfa.SetCharSet(nset); } else { // single character nfa.SetEdge(ccls[nfa.GetEdge()]); // map it. } } /* * now update spec with the mapping. */ spec.ccls_map = ccls; spec.dtrans_ncols = mapped_charset_size; }
public static NfaPair NewNLPair(Spec spec) { NfaPair nfaPair = Alloc.NewNfaPair(); nfaPair.end = Alloc.NewNfa(spec); nfaPair.start = Alloc.NewNfa(spec); Nfa start = nfaPair.start; start.Next = Alloc.NewNfa(spec); Nfa next = start.Next; next.Edge = ''; next.SetCharSet(new CharSet()); next.GetCharSet().add(10); next.Next = nfaPair.end; start.Sibling = Alloc.NewNfa(spec); Nfa sibling = start.Sibling; sibling.Edge = '\r'; sibling.Next = Alloc.NewNfa(spec); Nfa next2 = sibling.Next; next2.Next = null; next2.Sibling = Alloc.NewNfa(spec); next2.Sibling.Edge = '\n'; next2.Sibling.Next = nfaPair.end; return(nfaPair); }
private static Nfa machine() { Nfa nfa = Alloc.NewNfa(MakeNfa.spec); Nfa nfa2 = nfa; BitSet states = MakeNfa.gen.GetStates(); MakeNfa.spec.current_token = Tokens.EOS; MakeNfa.gen.Advance(); if (Tokens.END_OF_INPUT != MakeNfa.spec.current_token) { nfa2.Next = MakeNfa.rule(); MakeNfa.ProcessStates(states, nfa2.Next); } while (Tokens.END_OF_INPUT != MakeNfa.spec.current_token) { states = MakeNfa.gen.GetStates(); MakeNfa.gen.Advance(); if (Tokens.END_OF_INPUT == MakeNfa.spec.current_token) { break; } nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec); nfa2 = nfa2.Sibling; nfa2.Next = MakeNfa.rule(); MakeNfa.ProcessStates(states, nfa2.Next); } nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec); nfa2 = nfa2.Sibling; nfa2.Next = Alloc.NewNfa(MakeNfa.spec); Nfa next = nfa2.Next; next.Edge = ''; next.Next = Alloc.NewNfa(MakeNfa.spec); next.SetCharSet(new CharSet()); next.GetCharSet().add((int)MakeNfa.spec.BOL); next.GetCharSet().add((int)MakeNfa.spec.EOF); next.Next.SetAccept(new Accept(null)); for (int i = 0; i < MakeNfa.spec.States.Count; i++) { List <Nfa> list = MakeNfa.spec.state_rules[i]; list.Add(next); } return(nfa); }
internal static void simplify(Spec spec) { SimplifyNfa.computeClasses(spec); for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = spec.nfa_states[i]; if (nfa.Edge != '�' && nfa.Edge != '') { if (nfa.Edge == '') { CharSet charSet = new CharSet(); charSet.map(nfa.GetCharSet(), SimplifyNfa.ccls); nfa.SetCharSet(charSet); } else { nfa.Edge = SimplifyNfa.ccls[(int)nfa.Edge]; } } } spec.ccls_map = SimplifyNfa.ccls; spec.dtrans_ncols = SimplifyNfa.mapped_charset_size; }
/* * Function: machine * Description: Recursive descent regular expression parser. */ private static Nfa machine() { Nfa start; Nfa p; BitSet states; #if DESCENT_DEBUG Utility.enter("machine", spec.lexeme, spec.current_token); #endif start = Alloc.NewNfa(spec); p = start; states = gen.GetStates(); /* Begin: Added for states. */ spec.current_token = Gen.EOS; gen.Advance(); /* End: Added for states. */ if (Gen.END_OF_INPUT != spec.current_token) { p.SetNext(rule()); ProcessStates(states, p.GetNext()); } while (Gen.END_OF_INPUT != spec.current_token) { /* Make state changes HERE. */ states = gen.GetStates(); /* Begin: Added for states. */ gen.Advance(); if (Gen.END_OF_INPUT == spec.current_token) { break; } /* End: Added for states. */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(rule()); ProcessStates(states, p.GetNext()); } /* * add pseudo-rules for BOL and EOF */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(Alloc.NewNfa(spec)); Nfa pnext = p.GetNext(); pnext.SetEdge(Nfa.CCL); pnext.SetNext(Alloc.NewNfa(spec)); pnext.SetCharSet(new CharSet()); pnext.GetCharSet().add(spec.BOL); pnext.GetCharSet().add(spec.EOF); // do-nothing accept rule pnext.GetNext().SetAccept(new Accept(null, input.line_number + 1)); /* add the pseudo rules */ for (int i = 0; i < spec.states.Count; i++) { ArrayList srule = spec.state_rules[i]; srule.Add(pnext); } #if DESCENT_DEBUG Utility.leave("machine", spec.lexeme, spec.current_token); #endif return(start); }
private static void term(NfaPair pair) { if (Tokens.OPEN_PAREN == MakeNfa.spec.current_token) { MakeNfa.gen.Advance(); MakeNfa.expr(pair); if (Tokens.CLOSE_PAREN == MakeNfa.spec.current_token) { MakeNfa.gen.Advance(); return; } Error.ParseError(Errors.SYNTAX, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number); return; } else { Nfa nfa = Alloc.NewNfa(MakeNfa.spec); pair.start = nfa; nfa.Next = Alloc.NewNfa(MakeNfa.spec); pair.end = nfa.Next; bool flag = MakeNfa.spec.current_token == Tokens.LETTER && char.IsLetter(MakeNfa.spec.current_token_value); if (MakeNfa.spec.current_token != Tokens.ANY && MakeNfa.spec.current_token != Tokens.CCL_START && (!MakeNfa.spec.IgnoreCase || !flag)) { nfa.Edge = MakeNfa.spec.current_token_value; MakeNfa.gen.Advance(); return; } nfa.Edge = ''; nfa.SetCharSet(new CharSet()); CharSet charSet = nfa.GetCharSet(); if (MakeNfa.spec.IgnoreCase && flag) { charSet.addncase(MakeNfa.spec.current_token_value); } else { if (MakeNfa.spec.current_token == Tokens.ANY) { charSet.add(10); charSet.add(13); charSet.add((int)MakeNfa.spec.BOL); charSet.add((int)MakeNfa.spec.EOF); charSet.complement(); } else { MakeNfa.gen.Advance(); if (MakeNfa.spec.current_token == Tokens.CHAR_CLASS) { MakeNfa.gen.Advance(); if (!charSet.AddClass(MakeNfa.spec.class_name.ToLower())) { Error.ParseError(Errors.InvalidCharClass, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number); } } else { if (MakeNfa.spec.current_token == Tokens.AT_BOL) { MakeNfa.gen.Advance(); charSet.add((int)MakeNfa.spec.BOL); charSet.add((int)MakeNfa.spec.EOF); charSet.complement(); } } if (MakeNfa.spec.current_token != Tokens.CCL_END) { MakeNfa.dodash(charSet); } } } MakeNfa.gen.Advance(); return; } }