public virtual void TestEmpty() { IntsRef i = new IntsRef(); Assert.AreEqual(IntsRef.EMPTY_INTS, i.Ints); Assert.AreEqual(0, i.Offset); Assert.AreEqual(0, i.Length); }
/// <summary> /// <p>this method assumes valid UTF8 input. this method /// <strong>does not perform</strong> full UTF8 validation, it will check only the /// first byte of each codepoint (for multi-byte sequences any bytes after /// the head are skipped). /// </summary> /// <exception cref="IllegalArgumentException"> If invalid codepoint header byte occurs or the /// content is prematurely truncated. </exception> public static void UTF8toUTF32(BytesRef utf8, IntsRef utf32) { // TODO: broken if incoming result.offset != 0 // pre-alloc for worst case // TODO: ints cannot be null, should be an assert if (utf32.Ints == null || utf32.Ints.Length < utf8.Length) { utf32.Ints = new int[utf8.Length]; } int utf32Count = 0; int utf8Upto = utf8.Offset; int[] ints = utf32.Ints; var bytes = utf8.Bytes; int utf8Limit = utf8.Offset + utf8.Length; while (utf8Upto < utf8Limit) { int numBytes = Utf8CodeLength[bytes[utf8Upto] & 0xFF]; int v = 0; switch (numBytes) { case 1: ints[utf32Count++] = bytes[utf8Upto++]; continue; case 2: // 5 useful bits v = bytes[utf8Upto++] & 31; break; case 3: // 4 useful bits v = bytes[utf8Upto++] & 15; break; case 4: // 3 useful bits v = bytes[utf8Upto++] & 7; break; default: throw new System.ArgumentException("invalid utf8"); } // TODO: this may read past utf8's limit. int limit = utf8Upto + numBytes - 1; while (utf8Upto < limit) { v = v << 6 | bytes[utf8Upto++] & 63; } ints[utf32Count++] = v; } utf32.Offset = 0; utf32.Length = utf32Count; }
public virtual void TestFromInts() { int[] ints = new int[] { 1, 2, 3, 4 }; IntsRef i = new IntsRef(ints, 0, 4); Assert.AreEqual(ints, i.Ints); Assert.AreEqual(0, i.Offset); Assert.AreEqual(4, i.Length); IntsRef i2 = new IntsRef(ints, 1, 3); Assert.AreEqual(new IntsRef(new int[] { 2, 3, 4 }, 0, 3), i2); Assert.IsFalse(i.Equals(i2)); }
public virtual void TestUTF8toUTF32() { BytesRef utf8 = new BytesRef(20); IntsRef utf32 = new IntsRef(20); int[] codePoints = new int[20]; int num = AtLeast(50000); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); UnicodeUtil.UTF16toUTF8(s.ToCharArray(), 0, s.Length, utf8); UnicodeUtil.UTF8toUTF32(utf8, utf32); int charUpto = 0; int intUpto = 0; while (charUpto < s.Length) { int cp = Character.CodePointAt(s, charUpto); codePoints[intUpto++] = cp; charUpto += Character.CharCount(cp); } if (!ArrayUtil.Equals(codePoints, 0, utf32.Ints, utf32.Offset, intUpto)) { Console.WriteLine("FAILED"); for (int j = 0; j < s.Length; j++) { Console.WriteLine(" char[" + j + "]=" + ((int)s[j]).ToString("x")); } Console.WriteLine(); Assert.AreEqual(intUpto, utf32.Length); for (int j = 0; j < intUpto; j++) { Console.WriteLine(" " + utf32.Ints[j].ToString("x") + " vs " + codePoints[j].ToString("x")); } Assert.Fail("mismatch"); } } }