Ejemplo n.º 1
0
 /// <summary>
 /// Initializes this Unicode block
 /// </summary>
 /// <param name="name">Block's name</param>
 /// <param name="begin">Beginning character (included)</param>
 /// <param name="end">End character (included)</param>
 public UnicodeBlock(string name, int begin, int end)
 {
     this.name = name;
     span      = new UnicodeSpan(begin, end);
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Adds a unicode character span to an existing NFA automaton
        /// </summary>
        /// <param name="automata">The target NFA</param>
        /// <param name="span">The unicode span to add</param>
        private static void AddUnicodeSpanToNFA(NFA automata, UnicodeSpan span)
        {
            char[] b = span.Begin.GetUTF16();
            char[] e = span.End.GetUTF16();

            if (span.IsPlane0)
            {
                // this span is entirely in plane 0
                automata.StateEntry.AddTransition(new CharSpan(b[0], e[0]), automata.StateExit);
            }
            else if (span.Begin.IsPlane0)
            {
                // this span has only a part in plane 0
                if (b[0] < 0xD800)
                {
                    automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xD7FF), automata.StateExit);
                    automata.StateEntry.AddTransition(new CharSpan((char)0xE000, (char)0xFFFF), automata.StateExit);
                }
                else
                {
                    automata.StateEntry.AddTransition(new CharSpan(b[0], (char)0xFFFF), automata.StateExit);
                }
                NFAState intermediate = automata.AddNewState();
                automata.StateEntry.AddTransition(new CharSpan((char)0xD800, e[0]), intermediate);
                intermediate.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
            }
            else
            {
                // this span has no part in plane 0
                if (b[0] == e[0])
                {
                    // same first surrogate
                    NFAState intermediate = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), intermediate);
                    intermediate.AddTransition(new CharSpan(b[1], e[1]), automata.StateExit);
                }
                else if (e[0] == b[0] + 1)
                {
                    // the first surrogates are consecutive encodings
                    // build lower half
                    NFAState i1 = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), i1);
                    i1.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit);
                    // build upper half
                    NFAState i2 = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), i2);
                    i2.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
                }
                else
                {
                    // there is at least one surrogate value between the first surrogates of begin and end
                    // build lower part
                    NFAState ia = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(b[0], b[0]), ia);
                    ia.AddTransition(new CharSpan(b[1], (char)0xDFFF), automata.StateExit);
                    // build intermediate part
                    NFAState im = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan((char)(b[0] + 1), (char)(e[0] - 1)), im);
                    im.AddTransition(new CharSpan((char)0xDC00, (char)0xDFFF), automata.StateExit);
                    // build upper part
                    NFAState iz = automata.AddNewState();
                    automata.StateEntry.AddTransition(new CharSpan(e[0], e[0]), iz);
                    iz.AddTransition(new CharSpan((char)0xDC00, e[1]), automata.StateExit);
                }
            }
        }