public void TestUnicode() { RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT); using (inWords = new StreamReader(@"ru\testUTF8.txt", Encoding.UTF8)) using (sampleUnicode = new StreamReader(@"ru\resUTF8.txt", Encoding.UTF8)) { TokenStream _in = ra.TokenStream("all", inWords); RussianLetterTokenizer sample = new RussianLetterTokenizer( sampleUnicode); ITermAttribute text = _in.GetAttribute<ITermAttribute>(); ITermAttribute sampleText = sample.GetAttribute<ITermAttribute>(); for (; ; ) { if (_in.IncrementToken() == false) break; bool nextSampleToken = sample.IncrementToken(); Assert.AreEqual(text.Term, nextSampleToken == false ? null : sampleText.Term, "Unicode"); } } }