Esempio n. 1
0
        public virtual void TestCharSequenceCharAt()
        {
            CharsRef c = new CharsRef("abc");

            Assert.AreEqual('b', c.CharAt(1));

            try
            {
                c.CharAt(-1);
                Assert.Fail();
            }
            catch (System.IndexOutOfRangeException expected)
            {
                // expected exception
            }

            try
            {
                c.CharAt(3);
                Assert.Fail();
            }
            catch (System.IndexOutOfRangeException expected)
            {
                // expected exception
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Encode characters from a char[] source, starting at
        ///  offset for length chars. After encoding, result.offset will always be 0.
        /// </summary>
        // TODO: broken if incoming result.offset != 0
        public static void UTF16toUTF8(CharsRef source, int offset, int length, BytesRef result)
        {
            int upto = 0;
            int i    = offset;
            int end  = offset + length;
            var @out = result.Bytes;
            // Pre-allocate for worst case 4-for-1
            int maxLen = length * 4;

            if (@out.Length < maxLen)
            {
                @out = result.Bytes = new byte[maxLen];
            }
            result.Offset = 0;

            while (i < end)
            {
                int code = (int)source.CharAt(i++);

                if (code < 0x80)
                {
                    @out[upto++] = (byte)code;
                }
                else if (code < 0x800)
                {
                    @out[upto++] = (byte)(0xC0 | (code >> 6));
                    @out[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else if (code < 0xD800 || code > 0xDFFF)
                {
                    @out[upto++] = (byte)(0xE0 | (code >> 12));
                    @out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
                    @out[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else
                {
                    // surrogate pair
                    // confirm valid high surrogate
                    if (code < 0xDC00 && i < end)
                    {
                        var utf32 = (int)source.CharAt(i);
                        // confirm valid low surrogate and write pair
                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
                        {
                            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
                            i++;
                            @out[upto++] = (byte)(0xF0 | (utf32 >> 18));
                            @out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
                            @out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
                            @out[upto++] = (byte)(0x80 | (utf32 & 0x3F));
                            continue;
                        }
                    }
                    // replace unpaired surrogate or out-of-order low surrogate
                    // with substitution character
                    @out[upto++] = unchecked ((byte)0xEF);
                    @out[upto++] = unchecked ((byte)0xBF);
                    @out[upto++] = unchecked ((byte)0xBD);
                }
            }
            //assert matches(source, offset, length, out, upto);
            result.Length = upto;
        }