Пример #1
0
        public virtual void TestEmpty()
        {
            IntsRef i = new IntsRef();

            Assert.AreEqual(IntsRef.EMPTY_INTS, i.Ints);
            Assert.AreEqual(0, i.Offset);
            Assert.AreEqual(0, i.Length);
        }
Пример #2
0
        /// <summary>
        /// <p>this method assumes valid UTF8 input. this method
        /// <strong>does not perform</strong> full UTF8 validation, it will check only the
        /// first byte of each codepoint (for multi-byte sequences any bytes after
        /// the head are skipped).
        /// </summary>
        /// <exception cref="IllegalArgumentException"> If invalid codepoint header byte occurs or the
        ///    content is prematurely truncated. </exception>
        public static void UTF8toUTF32(BytesRef utf8, IntsRef utf32)
        {
            // TODO: broken if incoming result.offset != 0
            // pre-alloc for worst case
            // TODO: ints cannot be null, should be an assert
            if (utf32.Ints == null || utf32.Ints.Length < utf8.Length)
            {
                utf32.Ints = new int[utf8.Length];
            }
            int utf32Count = 0;
            int utf8Upto   = utf8.Offset;

            int[] ints      = utf32.Ints;
            var   bytes     = utf8.Bytes;
            int   utf8Limit = utf8.Offset + utf8.Length;

            while (utf8Upto < utf8Limit)
            {
                int numBytes = Utf8CodeLength[bytes[utf8Upto] & 0xFF];
                int v        = 0;
                switch (numBytes)
                {
                case 1:
                    ints[utf32Count++] = bytes[utf8Upto++];
                    continue;

                case 2:
                    // 5 useful bits
                    v = bytes[utf8Upto++] & 31;
                    break;

                case 3:
                    // 4 useful bits
                    v = bytes[utf8Upto++] & 15;
                    break;

                case 4:
                    // 3 useful bits
                    v = bytes[utf8Upto++] & 7;
                    break;

                default:
                    throw new System.ArgumentException("invalid utf8");
                }

                // TODO: this may read past utf8's limit.
                int limit = utf8Upto + numBytes - 1;
                while (utf8Upto < limit)
                {
                    v = v << 6 | bytes[utf8Upto++] & 63;
                }
                ints[utf32Count++] = v;
            }

            utf32.Offset = 0;
            utf32.Length = utf32Count;
        }
Пример #3
0
        public virtual void TestFromInts()
        {
            int[]   ints = new int[] { 1, 2, 3, 4 };
            IntsRef i    = new IntsRef(ints, 0, 4);

            Assert.AreEqual(ints, i.Ints);
            Assert.AreEqual(0, i.Offset);
            Assert.AreEqual(4, i.Length);

            IntsRef i2 = new IntsRef(ints, 1, 3);

            Assert.AreEqual(new IntsRef(new int[] { 2, 3, 4 }, 0, 3), i2);

            Assert.IsFalse(i.Equals(i2));
        }
Пример #4
0
        public virtual void TestUTF8toUTF32()
        {
            BytesRef utf8  = new BytesRef(20);
            IntsRef  utf32 = new IntsRef(20);

            int[] codePoints = new int[20];
            int   num        = AtLeast(50000);

            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                UnicodeUtil.UTF16toUTF8(s.ToCharArray(), 0, s.Length, utf8);
                UnicodeUtil.UTF8toUTF32(utf8, utf32);

                int charUpto = 0;
                int intUpto  = 0;

                while (charUpto < s.Length)
                {
                    int cp = Character.CodePointAt(s, charUpto);
                    codePoints[intUpto++] = cp;
                    charUpto += Character.CharCount(cp);
                }
                if (!ArrayUtil.Equals(codePoints, 0, utf32.Ints, utf32.Offset, intUpto))
                {
                    Console.WriteLine("FAILED");
                    for (int j = 0; j < s.Length; j++)
                    {
                        Console.WriteLine("  char[" + j + "]=" + ((int)s[j]).ToString("x"));
                    }
                    Console.WriteLine();
                    Assert.AreEqual(intUpto, utf32.Length);
                    for (int j = 0; j < intUpto; j++)
                    {
                        Console.WriteLine("  " + utf32.Ints[j].ToString("x") + " vs " + codePoints[j].ToString("x"));
                    }
                    Assert.Fail("mismatch");
                }
            }
        }