コード例 #1
0
        public void TestUnicode()
        {
            RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);

            using (inWords = new StreamReader(@"ru\testUTF8.txt", Encoding.UTF8))
            using (sampleUnicode = new StreamReader(@"ru\resUTF8.txt", Encoding.UTF8))
            {

                TokenStream _in = ra.TokenStream("all", inWords);

                RussianLetterTokenizer sample =
                    new RussianLetterTokenizer(
                        sampleUnicode);

                ITermAttribute text = _in.GetAttribute<ITermAttribute>();
                ITermAttribute sampleText = sample.GetAttribute<ITermAttribute>();

                for (; ; )
                {
                    if (_in.IncrementToken() == false)
                        break;

                    bool nextSampleToken = sample.IncrementToken();
                    Assert.AreEqual(text.Term, nextSampleToken == false ? null : sampleText.Term, "Unicode");
                }
            }
        }
コード例 #2
0
        public void TestDigitsInRussianCharset()
        {
            TextReader reader = new StringReader("text 1000");
            RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
            TokenStream stream = ra.TokenStream("", reader);

            ITermAttribute termText = stream.GetAttribute<ITermAttribute>();
            
            try
            {
                Assert.True(stream.IncrementToken());
                Assert.AreEqual("text", termText.Term);
                Assert.True(stream.IncrementToken());
                Assert.AreEqual("1000", termText.Term, "RussianAnalyzer's tokenizer skips numbers from input text");
                Assert.False(stream.IncrementToken());
            }
            catch (IOException e)
            {
                Assert.Fail("unexpected IOException");
            }
        }