public virtual void TestCharSequenceCharAt() { CharsRef c = new CharsRef("abc"); Assert.AreEqual('b', c.CharAt(1)); try { c.CharAt(-1); Assert.Fail(); } catch (System.IndexOutOfRangeException expected) { // expected exception } try { c.CharAt(3); Assert.Fail(); } catch (System.IndexOutOfRangeException expected) { // expected exception } }
/// <summary> /// Encode characters from a char[] source, starting at /// offset for length chars. After encoding, result.offset will always be 0. /// </summary> // TODO: broken if incoming result.offset != 0 public static void UTF16toUTF8(CharsRef source, int offset, int length, BytesRef result) { int upto = 0; int i = offset; int end = offset + length; var @out = result.Bytes; // Pre-allocate for worst case 4-for-1 int maxLen = length * 4; if (@out.Length < maxLen) { @out = result.Bytes = new byte[maxLen]; } result.Offset = 0; while (i < end) { int code = (int)source.CharAt(i++); if (code < 0x80) { @out[upto++] = (byte)code; } else if (code < 0x800) { @out[upto++] = (byte)(0xC0 | (code >> 6)); @out[upto++] = (byte)(0x80 | (code & 0x3F)); } else if (code < 0xD800 || code > 0xDFFF) { @out[upto++] = (byte)(0xE0 | (code >> 12)); @out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)); @out[upto++] = (byte)(0x80 | (code & 0x3F)); } else { // surrogate pair // confirm valid high surrogate if (code < 0xDC00 && i < end) { var utf32 = (int)source.CharAt(i); // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { utf32 = (code << 10) + utf32 + SURROGATE_OFFSET; i++; @out[upto++] = (byte)(0xF0 | (utf32 >> 18)); @out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); @out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)); @out[upto++] = (byte)(0x80 | (utf32 & 0x3F)); continue; } } // replace unpaired surrogate or out-of-order low surrogate // with substitution character @out[upto++] = unchecked ((byte)0xEF); @out[upto++] = unchecked ((byte)0xBF); @out[upto++] = unchecked ((byte)0xBD); } } //assert matches(source, offset, length, out, upto); result.Length = upto; }