/// <summary> /// Initializes this Unicode block /// </summary> /// <param name="name">Block's name</param> /// <param name="begin">Beginning character (included)</param> /// <param name="end">End character (included)</param> public UnicodeBlock(string name, int begin, int end) { this.name = name; span = new UnicodeSpan(begin, end); }
/// <summary> /// Adds a unicode character span to an existing NFA automaton /// </summary> /// <param name="automata">The target NFA</param> /// <param name="span">The unicode span to add</param> private static void AddUnicodeSpanToNFA(NFA automata, UnicodeSpan span) { char[] b = span.Begin.GetUTF16(); char[] e = span.End.GetUTF16(); if (span.IsPlane0) { // this span is entirely in plane 0 automata.StateEntry.AddTransition(new CharSpan(b[0], e[0]), automata.StateExit); } else if (span.Begin.IsPlane0) { // this span has only a part in plane 0 if (b[0] < 0xD800) { automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xD7FF), automata.StateExit); automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit); } else { automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xFFFF), automata.StateExit); } NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)0xD800, e[0]), intermediate); intermediate.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } else { // this span has no part in plane 0 if (b[0] == e[0]) { // same first surrogate NFAState intermediate = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), intermediate); intermediate.AddTransition(new CharSpan(b[1], e[1]), automata.StateExit); } else if (e[0] == b[0] + 1) { // the first surrogates are consecutive encodings // build lower half NFAState i1 = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), i1); i1.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit); // build upper half NFAState i2 = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), i2); i2.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } else { // there is at least one surrogate value between the first surrogates of begin and end // build lower part NFAState ia = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), ia); ia.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit); // build intermediate part NFAState im = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan((char)(b[0] + 1), (char)(e[0] - 1)), im); im.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit); // build upper part NFAState iz = automata.AddNewState(); automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), iz); iz.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit); } } }