Ejemplo n.º 1
0
        public void TestUnaccentedWordAnalyzer()
        {
            TopDocs td   = null;
            string  text = "[email protected] 123.456 ğüşıöç%ĞÜŞİÖÇ$ΑΒΓΔΕΖ#АБВГДЕ SSß";

            string[] expectedTokens = new string[] { "name", "surname", "gmail", "com", "123", "456", "gusioc", "gusioc", "αβγδεζ", "абвгде", "ssss" };

            UnaccentedWordAnalyzer analyzer = new UnaccentedWordAnalyzer();
            TokenStream            ts       = analyzer.TokenStream("", new System.IO.StringReader(text));

            int           i             = 0;
            TermAttribute termAttribute = (TermAttribute)ts.GetAttribute(typeof(TermAttribute));

            while (ts.IncrementToken())
            {
                Assert.AreEqual(expectedTokens[i++], termAttribute.Term());
                System.Diagnostics.Debug.WriteLine(termAttribute.Term());
            }

            QueryParser   p   = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer);
            IndexSearcher src = CreateIndex(text, analyzer);

            td = src.Search(p.Parse("ĞÜŞıöç"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("NAME.surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname@gmail"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name@gmail"), 10);
            Assert.AreEqual(0, td.totalHits);

            td = src.Search(p.Parse("456"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("123.456"), 10);
            Assert.AreEqual(1, td.totalHits);
        }
Ejemplo n.º 2
0
        public void TestUnaccentedWordAnalyzer()
        {
            TopDocs td = null;
            string text = "[email protected] 123.456 güsiöç%GÜSIÖÇ$??G???#?????? SSß";
            string[] expectedTokens = new string[] { "name", "surname", "gmail", "com", "123", "456", "gusioc", "gusioc", "aß?de?" , "??????", "ssss"};

            UnaccentedWordAnalyzer analyzer = new UnaccentedWordAnalyzer();
            TokenStream ts = analyzer.TokenStream("", new System.IO.StringReader(text));
            
            int i = 0;
            TermAttribute termAttribute = (TermAttribute)ts.GetAttribute(typeof(TermAttribute));
            while (ts.IncrementToken())
            {
                Assert.AreEqual(expectedTokens[i++], termAttribute.Term());
                System.Diagnostics.Debug.WriteLine(termAttribute.Term());
            }

            QueryParser p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer);
            IndexSearcher src = CreateIndex(text, analyzer);
            
            td = src.Search(p.Parse("GÜSiöç"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("NAME.surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname@gmail"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name@gmail"), 10);
            Assert.AreEqual(0, td.totalHits);

            td = src.Search(p.Parse("456"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("123.456"), 10);
            Assert.AreEqual(1, td.totalHits);
        }
Ejemplo n.º 3
0
        public void TestUnaccentedWordAnalyzer()
        {
            TopDocs td = null;
            string text = "[email protected] 123.456 ğüşıöç%ĞÜŞİÖÇ$ΑΒΓΔΕΖ#АБВГДЕ SSß";
            string[] expectedTokens = new string[] { "name", "surname", "gmail", "com", "123", "456", "gusioc", "gusioc", "αβγδεζ" , "абвгде", "ssss"};

            UnaccentedWordAnalyzer analyzer = new UnaccentedWordAnalyzer();
            TokenStream ts = analyzer.TokenStream("", new System.IO.StringReader(text));
            
            int i = 0;
            ITermAttribute termAttribute = ts.GetAttribute<ITermAttribute>();
            while (ts.IncrementToken())
            {
                Assert.AreEqual(expectedTokens[i++], termAttribute.Term);
                System.Diagnostics.Debug.WriteLine(termAttribute.Term);
            }

            QueryParser p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer);
            IndexSearcher src = CreateIndex(text, analyzer);
            
            td = src.Search(p.Parse("ĞÜŞıöç"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("name"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("surname"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("NAME.surname"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("surname@gmail"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("name@gmail"), 10);
            Assert.AreEqual(0, td.TotalHits);

            td = src.Search(p.Parse("456"), 10);
            Assert.AreEqual(1, td.TotalHits);

            td = src.Search(p.Parse("123.456"), 10);
            Assert.AreEqual(1, td.TotalHits);
        }