static private int mapped_charset_size; // reduced charset size static internal void simplify(Spec spec) { computeClasses(spec); // initialize fields. /* * now rewrite the NFA using our character class mapping. */ for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = (Nfa)spec.nfa_states[i]; if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON) { continue; // no change. } if (nfa.GetEdge() == Nfa.CCL) { CharSet nset = new CharSet(); nset.map(nfa.GetCharSet(), ccls); // map it. nfa.SetCharSet(nset); } else { // single character nfa.SetEdge(ccls[nfa.GetEdge()]); // map it. } } /* * now update spec with the mapping. */ spec.ccls_map = ccls; spec.dtrans_ncols = mapped_charset_size; }
public void move(Dfa dfa, int b) { List <Nfa> nFASet = dfa.GetNFASet(); this.nfa_set = null; this.nfa_bit = null; int count = nFASet.Count; for (int i = 0; i < count; i++) { Nfa nfa = nFASet[i]; if (b == (int)nfa.Edge || ('' == nfa.Edge && nfa.GetCharSet().contains(b))) { if (this.nfa_set == null) { this.nfa_set = new List <Nfa>(); this.nfa_bit = new BitSet(); } this.nfa_set.Add(nfa.Next); this.nfa_bit.Set(nfa.Next.Label, true); } } if (this.nfa_set != null) { this.sort_states(); } }
public static NfaPair NewNLPair(Spec spec) { NfaPair nfaPair = Alloc.NewNfaPair(); nfaPair.end = Alloc.NewNfa(spec); nfaPair.start = Alloc.NewNfa(spec); Nfa start = nfaPair.start; start.Next = Alloc.NewNfa(spec); Nfa next = start.Next; next.Edge = ''; next.SetCharSet(new CharSet()); next.GetCharSet().add(10); next.Next = nfaPair.end; start.Sibling = Alloc.NewNfa(spec); Nfa sibling = start.Sibling; sibling.Edge = '\r'; sibling.Next = Alloc.NewNfa(spec); Nfa next2 = sibling.Next; next2.Next = null; next2.Sibling = Alloc.NewNfa(spec); next2.Sibling.Edge = '\n'; next2.Sibling.Next = nfaPair.end; return(nfaPair); }
private static Nfa machine() { Nfa nfa = Alloc.NewNfa(MakeNfa.spec); Nfa nfa2 = nfa; BitSet states = MakeNfa.gen.GetStates(); MakeNfa.spec.current_token = Tokens.EOS; MakeNfa.gen.Advance(); if (Tokens.END_OF_INPUT != MakeNfa.spec.current_token) { nfa2.Next = MakeNfa.rule(); MakeNfa.ProcessStates(states, nfa2.Next); } while (Tokens.END_OF_INPUT != MakeNfa.spec.current_token) { states = MakeNfa.gen.GetStates(); MakeNfa.gen.Advance(); if (Tokens.END_OF_INPUT == MakeNfa.spec.current_token) { break; } nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec); nfa2 = nfa2.Sibling; nfa2.Next = MakeNfa.rule(); MakeNfa.ProcessStates(states, nfa2.Next); } nfa2.Sibling = Alloc.NewNfa(MakeNfa.spec); nfa2 = nfa2.Sibling; nfa2.Next = Alloc.NewNfa(MakeNfa.spec); Nfa next = nfa2.Next; next.Edge = ''; next.Next = Alloc.NewNfa(MakeNfa.spec); next.SetCharSet(new CharSet()); next.GetCharSet().add((int)MakeNfa.spec.BOL); next.GetCharSet().add((int)MakeNfa.spec.EOF); next.Next.SetAccept(new Accept(null)); for (int i = 0; i < MakeNfa.spec.States.Count; i++) { List <Nfa> list = MakeNfa.spec.state_rules[i]; list.Add(next); } return(nfa); }
private static void computeClasses(Spec spec) { SimplifyNfa.original_charset_size = spec.dtrans_ncols; SimplifyNfa.ccls = new char[SimplifyNfa.original_charset_size]; char c = '\u0001'; BitSet bitSet = new BitSet(); BitSet bitSet2 = new BitSet(); Dictionary <char, char> dictionary = new Dictionary <char, char>(); Console.WriteLine("Working on character classes."); for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = spec.nfa_states[i]; if (nfa.Edge != '�' && nfa.Edge != '') { bitSet.ClearAll(); bitSet2.ClearAll(); for (int j = 0; j < SimplifyNfa.ccls.Length; j++) { if ((int)nfa.Edge == j || (nfa.Edge == '' && nfa.GetCharSet().contains(j))) { bitSet.Set((int)SimplifyNfa.ccls[j], true); } else { bitSet2.Set((int)SimplifyNfa.ccls[j], true); } } bitSet.And(bitSet2); if (bitSet.GetLength() != 0) { dictionary.Clear(); for (int k = 0; k < SimplifyNfa.ccls.Length; k++) { if (bitSet.Get((int)SimplifyNfa.ccls[k]) && ((int)nfa.Edge == k || (nfa.Edge == '' && nfa.GetCharSet().contains(k)))) { char c2 = SimplifyNfa.ccls[k]; if (!dictionary.ContainsKey(c2)) { Dictionary <char, char> arg_14F_0 = dictionary; char arg_14F_1 = c2; char expr_14A = c; c = (char)(expr_14A + '\u0001'); arg_14F_0.Add(arg_14F_1, expr_14A); } SimplifyNfa.ccls[k] = dictionary[c2]; } } } } } SimplifyNfa.mapped_charset_size = (int)c; }
internal static void simplify(Spec spec) { SimplifyNfa.computeClasses(spec); for (int i = 0; i < spec.nfa_states.Count; i++) { Nfa nfa = spec.nfa_states[i]; if (nfa.Edge != '�' && nfa.Edge != '') { if (nfa.Edge == '') { CharSet charSet = new CharSet(); charSet.map(nfa.GetCharSet(), SimplifyNfa.ccls); nfa.SetCharSet(charSet); } else { nfa.Edge = SimplifyNfa.ccls[(int)nfa.Edge]; } } } spec.ccls_map = SimplifyNfa.ccls; spec.dtrans_ncols = SimplifyNfa.mapped_charset_size; }
/* * Compute minimum set of character classes needed to disambiguate * edges. We optimistically assume that every character belongs to * a single character class, and then incrementally split classes * as we see edges that require discrimination between characters in * the class. */ static private void computeClasses(Spec spec) { original_charset_size = spec.dtrans_ncols; ccls = new int[original_charset_size]; // initially all zero. int nextcls = 1; BitSet clsA = new BitSet(); BitSet clsB = new BitSet(); Hashtable h = new Hashtable(); Console.WriteLine("Working on character classes."); for (int index = 0; index < spec.nfa_states.Count; index++) { Nfa nfa = (Nfa)spec.nfa_states[index]; if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON) { continue; // no discriminatory information. } clsA.ClearAll(); clsB.ClearAll(); for (int i = 0; i < ccls.Length; i++) { if (nfa.GetEdge() == i || // edge labeled with a character nfa.GetEdge() == Nfa.CCL && nfa.GetCharSet().contains(i)) // set of characters { clsA.Set(ccls[i], true); } else { clsB.Set(ccls[i], true); } } /* * now figure out which character classes we need to split. */ clsA.And(clsB); // split the classes which show up on both sides of edge if (clsA.GetLength() == 0) { Console.Write("."); continue; } Console.Write(":"); /* * and split them. */ h.Clear(); // h will map old to new class name for (int i = 0; i < ccls.Length; i++) { if (clsA.Get(ccls[i])) // a split class { if (nfa.GetEdge() == i || nfa.GetEdge() == Nfa.CCL && nfa.GetCharSet().contains(i)) { // on A side int split = ccls[i]; if (!h.ContainsKey(split)) { h.Add(split, nextcls++); // make new class #if DEBUG Console.WriteLine("Adding char " + (nextcls - 1) + " split=" + split + " i=" + i); #endif } ccls[i] = (int)h[split]; } } } } Console.WriteLine(); Console.WriteLine("NFA has " + nextcls + " distinct character classes."); mapped_charset_size = nextcls; }
/* * Function: machine * Description: Recursive descent regular expression parser. */ private static Nfa machine() { Nfa start; Nfa p; BitSet states; #if DESCENT_DEBUG Utility.enter("machine", spec.lexeme, spec.current_token); #endif start = Alloc.NewNfa(spec); p = start; states = gen.GetStates(); /* Begin: Added for states. */ spec.current_token = Gen.EOS; gen.Advance(); /* End: Added for states. */ if (Gen.END_OF_INPUT != spec.current_token) { p.SetNext(rule()); ProcessStates(states, p.GetNext()); } while (Gen.END_OF_INPUT != spec.current_token) { /* Make state changes HERE. */ states = gen.GetStates(); /* Begin: Added for states. */ gen.Advance(); if (Gen.END_OF_INPUT == spec.current_token) { break; } /* End: Added for states. */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(rule()); ProcessStates(states, p.GetNext()); } /* * add pseudo-rules for BOL and EOF */ p.SetSib(Alloc.NewNfa(spec)); p = p.GetSib(); p.SetNext(Alloc.NewNfa(spec)); Nfa pnext = p.GetNext(); pnext.SetEdge(Nfa.CCL); pnext.SetNext(Alloc.NewNfa(spec)); pnext.SetCharSet(new CharSet()); pnext.GetCharSet().add(spec.BOL); pnext.GetCharSet().add(spec.EOF); // do-nothing accept rule pnext.GetNext().SetAccept(new Accept(null, input.line_number + 1)); /* add the pseudo rules */ for (int i = 0; i < spec.states.Count; i++) { ArrayList srule = spec.state_rules[i]; srule.Add(pnext); } #if DESCENT_DEBUG Utility.leave("machine", spec.lexeme, spec.current_token); #endif return(start); }
private static void term(NfaPair pair) { if (Tokens.OPEN_PAREN == MakeNfa.spec.current_token) { MakeNfa.gen.Advance(); MakeNfa.expr(pair); if (Tokens.CLOSE_PAREN == MakeNfa.spec.current_token) { MakeNfa.gen.Advance(); return; } Error.ParseError(Errors.SYNTAX, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number); return; } else { Nfa nfa = Alloc.NewNfa(MakeNfa.spec); pair.start = nfa; nfa.Next = Alloc.NewNfa(MakeNfa.spec); pair.end = nfa.Next; bool flag = MakeNfa.spec.current_token == Tokens.LETTER && char.IsLetter(MakeNfa.spec.current_token_value); if (MakeNfa.spec.current_token != Tokens.ANY && MakeNfa.spec.current_token != Tokens.CCL_START && (!MakeNfa.spec.IgnoreCase || !flag)) { nfa.Edge = MakeNfa.spec.current_token_value; MakeNfa.gen.Advance(); return; } nfa.Edge = ''; nfa.SetCharSet(new CharSet()); CharSet charSet = nfa.GetCharSet(); if (MakeNfa.spec.IgnoreCase && flag) { charSet.addncase(MakeNfa.spec.current_token_value); } else { if (MakeNfa.spec.current_token == Tokens.ANY) { charSet.add(10); charSet.add(13); charSet.add((int)MakeNfa.spec.BOL); charSet.add((int)MakeNfa.spec.EOF); charSet.complement(); } else { MakeNfa.gen.Advance(); if (MakeNfa.spec.current_token == Tokens.CHAR_CLASS) { MakeNfa.gen.Advance(); if (!charSet.AddClass(MakeNfa.spec.class_name.ToLower())) { Error.ParseError(Errors.InvalidCharClass, MakeNfa.gen.InputFilePath, MakeNfa.input.line_number); } } else { if (MakeNfa.spec.current_token == Tokens.AT_BOL) { MakeNfa.gen.Advance(); charSet.add((int)MakeNfa.spec.BOL); charSet.add((int)MakeNfa.spec.EOF); charSet.complement(); } } if (MakeNfa.spec.current_token != Tokens.CCL_END) { MakeNfa.dodash(charSet); } } } MakeNfa.gen.Advance(); return; } }