public void TestMassiveLigature() { String input = "\uFDFA"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose)); Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); AssertTokenStreamContents(tokenStream, new String[] { "صلى", "الله", "عليه", "وسلم" }, new int[] { 0, 0, 0, 0 }, new int[] { 0, 0, 0, 1 }, input.Length ); }
public void TestTokenStream() { // '℃', '№', '㈱', '㌘', 'サ'+'<<', 'ソ'+'<<', '㌰'+'<<' String input = "℃ № ㈱ ㌘ ザ ゾ ㌰゙"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc", Normalizer2Mode.Compose)); Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); AssertTokenStreamContents(tokenStream, new String[] { "°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ" }, new int[] { 0, 2, 4, 6, 8, 11, 14 }, new int[] { 1, 3, 5, 7, 10, 13, 16 }, input.Length); }
public void TestTokenStream2() { // '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'サ', '<<', 'ソ', '<<' String input = "㌰゙5℃№㈱㌘ザゾ"; CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose)); Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, reader, 1, 1); AssertTokenStreamContents(tokenStream, new String[] { "ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ" }, new int[] { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9 }, new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11 }, input.Length ); }
public void TestNormalization() { String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि"; Normalizer2 normalizer = Normalizer2.GetInstance(null, "nfkc_cf", Normalizer2Mode.Compose); String expectedOutput = normalizer.Normalize(input); CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), normalizer); char[] tempBuff = new char[10]; StringBuilder output = new StringBuilder(); while (true) { int length = reader.read(tempBuff); if (length == -1) { break; } output.Append(tempBuff, 0, length); assertEquals(output.toString(), normalizer.Normalize(input.Substring(0, reader.CorrectOffset(output.Length) - 0))); } assertEquals(expectedOutput, output.toString()); }