/// <summary> /// Converts an incoming utf32 <see cref="Automaton"/> to an equivalent /// utf8 one. The incoming automaton need not be /// deterministic. Note that the returned automaton will /// not in general be deterministic, so you must /// determinize it if that's needed. /// </summary> public Automaton Convert(Automaton utf32) { if (utf32.IsSingleton) { utf32 = utf32.CloneExpanded(); } State[] map = new State[utf32.GetNumberedStates().Length]; JCG.List <State> pending = new JCG.List <State>(); State utf32State = utf32.GetInitialState(); pending.Add(utf32State); Automaton utf8 = new Automaton(); utf8.IsDeterministic = false; State utf8State = utf8.GetInitialState(); utf8States = new State[5]; utf8StateCount = 0; utf8State.number = utf8StateCount; utf8States[utf8StateCount] = utf8State; utf8StateCount++; utf8State.Accept = utf32State.Accept; map[utf32State.number] = utf8State; while (pending.Count != 0) { utf32State = pending[pending.Count - 1]; pending.RemoveAt(pending.Count - 1); utf8State = map[utf32State.number]; for (int i = 0; i < utf32State.numTransitions; i++) { Transition t = utf32State.TransitionsArray[i]; State destUTF32 = t.to; State destUTF8 = map[destUTF32.number]; if (destUTF8 == null) { destUTF8 = NewUTF8State(); destUTF8.accept = destUTF32.accept; map[destUTF32.number] = destUTF8; pending.Add(destUTF32); } ConvertOneEdge(utf8State, destUTF8, t.min, t.max); } } utf8.SetNumberedStates(utf8States, utf8StateCount); return(utf8); }
public void TestRandomRanges() { Random r = Random(); int ITERS = AtLeast(10); int ITERS_PER_DFA = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { int x1 = GetCodeStart(r); int x2 = GetCodeStart(r); int startCode, endCode; if (x1 < x2) { startCode = x1; endCode = x2; } else { startCode = x2; endCode = x1; } if (IsSurrogate(startCode) && IsSurrogate(endCode)) { iter--; continue; } var a = new Automaton(); var end = new State { Accept = true }; a.GetInitialState().AddTransition(new Transition(startCode, endCode, end)); a.IsDeterministic = true; TestOne(r, new ByteRunAutomaton(a), startCode, endCode, ITERS_PER_DFA); } }