Implements the fuzzy search query. The similarity measurement is based on the Damerau-Levenshtein (optimal string alignment) algorithm, though you can explicitly choose classic Levenshtein by passing false to the transpositions parameter.

this query uses MultiTermQuery.TopTermsScoringBooleanQueryRewrite as default. So terms will be collected and scored according to their edit distance. Only the top terms are used for building the BooleanQuery. It is not recommended to change the rewrite mode for fuzzy queries.

At most, this query will match terms up to {@value Lucene.Net.Util.Automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} edits. Higher distances (especially with transpositions enabled), are generally not useful and will match a significant amount of the term dictionary. If you really want this, consider using an n-gram indexing technique (such as the SpellChecker in the suggest module) instead.

NOTE: terms of length 1 or 2 will sometimes not match because of how the scaled distance between two terms is computed. For a term to match, the edit distance between the terms must be less than the minimum length term (either the input term, or the candidate term). For example, FuzzyQuery on term "abcd" with maxEdits=2 will not match an indexed term "ab", and FuzzyQuery on term "a" with maxEdits=2 will not match an indexed term "abc".

Inheritance: MultiTermQuery
		public Result Search (string term, int count, int start) {
			try {
				term = term.ToLower ();
				Term htTerm = new Term ("hottext", term);
				Query qq1 = new FuzzyQuery (htTerm);
				Query qq2 = new TermQuery (htTerm);
				qq2.Boost = 10f;
				Query qq3 = new PrefixQuery (htTerm);
				qq3.Boost = 10f;
				DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f);
				q1.Add (qq1);
				q1.Add (qq2);
				q1.Add (qq3);
				Query q2 = new TermQuery (new Term ("text", term));
				q2.Boost = 3f;
				Query q3 = new TermQuery (new Term ("examples", term));
				q3.Boost = 3f;
				DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f);

				q.Add (q1);
				q.Add (q2);
				q.Add (q3);
			
				TopDocs top = SearchInternal (q, count, start);
				Result r = new Result (term, searcher, top.ScoreDocs);
				Results.Add (r);
				return r;
			} catch (IOException) {
				Console.WriteLine ("No index in {0}", dir);
				return null;
			}
		}
        public virtual void Test2()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false));
            AddDoc("LANGE", writer);
            AddDoc("LUETH", writer);
            AddDoc("PIRSING", writer);
            AddDoc("RIEGEL", writer);
            AddDoc("TRZECZIAK", writer);
            AddDoc("WALKER", writer);
            AddDoc("WBR", writer);
            AddDoc("WE", writer);
            AddDoc("WEB", writer);
            AddDoc("WEBE", writer);
            AddDoc("WEBER", writer);
            AddDoc("WEBERE", writer);
            AddDoc("WEBREE", writer);
            AddDoc("WEBEREI", writer);
            AddDoc("WBRE", writer);
            AddDoc("WITTKOPF", writer);
            AddDoc("WOJNAROWSKI", writer);
            AddDoc("WRICKE", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
            //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(8, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
        public virtual void  TestTokenLengthOpt()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            AddDoc("12345678911", writer);
            AddDoc("segment", writer);
            writer.Optimize();
            writer.Close();
            IndexSearcher searcher = new IndexSearcher(directory);

            Query query;

            // term not over 10 chars, so optimization shortcuts
            query = new FuzzyQuery(new Term("field", "1234569"), 0.9f);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // 10 chars, so no optimization
            query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // over 10 chars, so no optimization
            query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);

            // over 10 chars, no match
            query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);
        }
Exemple #4
0
        public virtual void TestBoostOnlyRewrite()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            AddDoc("Lucene", writer);
            AddDoc("Lucene", writer);
            AddDoc("Lucenne", writer);

            IndexReader   reader   = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene"));

            query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            // normally, 'Lucenne' would be the first result as IDF will skew the score.
            Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field"));
            Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field"));
            Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field"));
            reader.Dispose();
            directory.Dispose();
        }
        public void FuzzyQueryTest()
        {
            string titulo = "titulo";
            string texto = "texto";
            using (var diretorio = new RAMDirectory())
            {
                IndexarArquivosEmDocumento(diretorio, new Field[]
                                                          {
                                                              new Field(titulo, "fuzzy", Field.Store.YES, Field.Index.ANALYZED),
                                                              new Field(titulo, "wuzzy", Field.Store.YES, Field.Index.ANALYZED)
                                                          });

                using (var searcher = new IndexSearcher(diretorio, true))
                {
                    var query = new FuzzyQuery(new Term(titulo, "wuzza"));
                    var matches = searcher.Search(query, 10);

                    Assert.AreEqual(2, matches.TotalHits, "both close enough");
                    Assert.IsTrue(matches.ScoreDocs[0].Score != matches.ScoreDocs[1].Score, "wuzzy closer then fuzzy");

                    var doc = searcher.Doc(matches.ScoreDocs[0].Doc);
                    Assert.AreEqual("wuzzy", doc.Get(titulo), "wazza bear");
                }
            }
        }
Exemple #6
0
        public virtual void TestBoostOnlyRewrite()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            AddDoc("Lucene", writer);
            AddDoc("Lucene", writer);
            AddDoc("Lucenne", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene"));

            query.MultiTermRewriteMethod = (new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            // normally, 'Lucenne' would be the first result as IDF will skew the score.
            Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field"));
            Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field"));
            Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field"));
            reader.Dispose();
            directory.Dispose();
        }
Exemple #7
0
        public override bool Equals(System.Object o)
        {
            if (this == o)
            {
                return(true);
            }
            if (!(o is FuzzyQuery))
            {
                return(false);
            }
            if (!base.Equals(o))
            {
                return(false);
            }

            FuzzyQuery fuzzyQuery = (FuzzyQuery)o;

            if (minimumSimilarity != fuzzyQuery.minimumSimilarity)
            {
                return(false);
            }
            if (prefixLength != fuzzyQuery.prefixLength)
            {
                return(false);
            }

            return(true);
        }
Exemple #8
0
 public override Query VisitFuzzyQuery(FuzzyQuery fuzzyq)
 {
     _dump.Append("FuzzyQ(");
     var q = base.VisitFuzzyQuery(fuzzyq);
     var fq = q as FuzzyQuery;
     if (fq != null)
     {
         _dump.Append(", minSimilarity:");
         _dump.Append(fq.GetMinSimilarity());
     }
     _dump.Append(BoostToString(q));
     _dump.Append(")");
     return q;
 }
Exemple #9
0
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (!base.Equals(obj))
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            FuzzyQuery other = (FuzzyQuery)obj;

            if (maxEdits != other.maxEdits)
            {
                return(false);
            }
            if (prefixLength != other.prefixLength)
            {
                return(false);
            }
            if (maxExpansions != other.maxExpansions)
            {
                return(false);
            }
            if (transpositions != other.transpositions)
            {
                return(false);
            }
            if (term == null)
            {
                if (other.term != null)
                {
                    return(false);
                }
            }
            else if (!term.Equals(other.term))
            {
                return(false);
            }
            return(true);
        }
Exemple #10
0
        public virtual void TestTieBreaker()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            AddDoc("a123456", writer);
            AddDoc("c123456", writer);
            AddDoc("d123456", writer);
            AddDoc("e123456", writer);

            Directory         directory2 = NewDirectory();
            RandomIndexWriter writer2    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory2);

            AddDoc("a123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("c123456", writer2);
            AddDoc("f123456", writer2);

            IndexReader ir1 = writer.GetReader();
            IndexReader ir2 = writer2.GetReader();

            MultiReader   mr       = new MultiReader(ir1, ir2);
            IndexSearcher searcher = NewSearcher(mr);
            FuzzyQuery    fq       = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false);
            TopDocs       docs     = searcher.Search(fq, 2);

            Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's
            mr.Dispose();
            ir1.Dispose();
            ir2.Dispose();
            writer.Dispose();
            writer2.Dispose();
            directory.Dispose();
            directory2.Dispose();
        }
Exemple #11
0
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (!base.Equals(obj))
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            FuzzyQuery other = (FuzzyQuery)obj;

            if (MaxEdits_Renamed != other.MaxEdits_Renamed)
            {
                return(false);
            }
            if (PrefixLength_Renamed != other.PrefixLength_Renamed)
            {
                return(false);
            }
            if (MaxExpansions != other.MaxExpansions)
            {
                return(false);
            }
            if (Transpositions_Renamed != other.Transpositions_Renamed)
            {
                return(false);
            }
            if (_term == null)
            {
                if (other._term != null)
                {
                    return(false);
                }
            }
            else if (!_term.Equals(other._term))
            {
                return(false);
            }
            return(true);
        }
        public IEnumerable<string> Search(string searchQuery)
        {
            const int Hits_Limit = 5;

            var timer = System.Diagnostics.Stopwatch.StartNew();

            var query = new FuzzyQuery(new Term("word", searchQuery), 0.5f);
            var docs = SearchHandle.Searcher.Search(query, null, Hits_Limit, Sort.RELEVANCE);

            foreach (var hit in docs.ScoreDocs)
            {
                var doc = SearchHandle.Searcher.Doc(hit.Doc);
                yield return doc.Get("word");
            }

            timer.Stop();
            var elapsed = timer.Elapsed;
        }
Exemple #13
0
        public virtual void TestDistanceAsEditsSearching()
        {
            Directory         index = NewDirectory();
            RandomIndexWriter w     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, index);

            AddDoc("foobar", w);
            AddDoc("test", w);
            AddDoc("working", w);
            IndexReader   reader   = w.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            w.Dispose();

            FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2);

            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            q    = new FuzzyQuery(new Term("field", "foubara"), 2);
            hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            try
            {
                q = new FuzzyQuery(new Term("field", "t"), 3);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (System.ArgumentException expected)
#pragma warning restore 168
            {
                // expected
            }

            reader.Dispose();
            index.Dispose();
        }
Exemple #14
0
        public virtual void TestGiga()
        {
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            Directory         index    = NewDirectory();
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, index);

            AddDoc("Lucene in Action", w);
            AddDoc("Lucene for Dummies", w);

            //addDoc("Giga", w);
            AddDoc("Giga byte", w);

            AddDoc("ManagingGigabytesManagingGigabyte", w);
            AddDoc("ManagingGigabytesManagingGigabytes", w);

            AddDoc("The Art of Computer Science", w);
            AddDoc("J. K. Rowling", w);
            AddDoc("JK Rowling", w);
            AddDoc("Joanne K Roling", w);
            AddDoc("Bruce Willis", w);
            AddDoc("Willis bruce", w);
            AddDoc("Brute willis", w);
            AddDoc("B. willis", w);
            IndexReader r = w.GetReader();

            w.Dispose();

            Query q = new FuzzyQuery(new Term("field", "giga"), 0);

            // 3. search
            IndexSearcher searcher = NewSearcher(r);

            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
            r.Dispose();
            index.Dispose();
        }
Exemple #15
0
        public virtual void Test2()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false));

            AddDoc("LANGE", writer);
            AddDoc("LUETH", writer);
            AddDoc("PIRSING", writer);
            AddDoc("RIEGEL", writer);
            AddDoc("TRZECZIAK", writer);
            AddDoc("WALKER", writer);
            AddDoc("WBR", writer);
            AddDoc("WE", writer);
            AddDoc("WEB", writer);
            AddDoc("WEBE", writer);
            AddDoc("WEBER", writer);
            AddDoc("WEBERE", writer);
            AddDoc("WEBREE", writer);
            AddDoc("WEBEREI", writer);
            AddDoc("WBRE", writer);
            AddDoc("WITTKOPF", writer);
            AddDoc("WOJNAROWSKI", writer);
            AddDoc("WRICKE", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);

            //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(8, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
Exemple #16
0
		public virtual void  TestEquals()
		{
			WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
			WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
			WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));
			
			// reflexive?
			Assert.AreEqual(wq1, wq2);
			Assert.AreEqual(wq2, wq1);
			
			// transitive?
			Assert.AreEqual(wq2, wq3);
			Assert.AreEqual(wq1, wq3);
			
			Assert.IsFalse(wq1.Equals(null));
			
			FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));
			Assert.IsFalse(wq1.Equals(fq));
			Assert.IsFalse(fq.Equals(wq1));
		}
        public virtual void TestDistanceAsEditsSearching()
        {
            Directory         index = NewDirectory();
            RandomIndexWriter w     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, index);

            AddDoc("foobar", w);
            AddDoc("test", w);
            AddDoc("working", w);
            IndexReader   reader   = w.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            w.Dispose();

            FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2);

            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            q    = new FuzzyQuery(new Term("field", "foubara"), 2);
            hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            try
            {
                q = new FuzzyQuery(new Term("field", "t"), 3);
                Assert.Fail();
            }
            catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            {
                // expected
            }

            reader.Dispose();
            index.Dispose();
        }
Exemple #18
0
        public virtual void TestEquals()
        {
            WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
            WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
            WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));

            // reflexive?
            Assert.AreEqual(wq1, wq2);
            Assert.AreEqual(wq2, wq1);

            // transitive?
            Assert.AreEqual(wq2, wq3);
            Assert.AreEqual(wq1, wq3);

            Assert.IsFalse(wq1.Equals(null));

            FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));

            Assert.IsFalse(wq1.Equals(fq));
            Assert.IsFalse(fq.Equals(wq1));
        }
Exemple #19
0
        public string DidYouMean(string pattern)
        {
            try
            {
                IndexSearcher searcher = new IndexSearcher(m_HistoryPath);

                Term t = new Term(Constants.SearchedText, pattern);
                FuzzyQuery query = new FuzzyQuery(t);

                Hits hits = searcher.Search(query);

                if (hits.Length() != 0)
                    return hits.Doc(0).Get(Constants.SearchedText);
                else
                    return "";

            }
            catch (Exception)
            {
                return "";
            }
        }
Exemple #20
0
        public virtual void TestTieBreaker()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            AddDoc("a123456", writer);
            AddDoc("c123456", writer);
            AddDoc("d123456", writer);
            AddDoc("e123456", writer);

            Directory         directory2 = NewDirectory();
            RandomIndexWriter writer2    = new RandomIndexWriter(Random(), directory2, Similarity, TimeZone);

            AddDoc("a123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("c123456", writer2);
            AddDoc("f123456", writer2);

            IndexReader ir1 = writer.Reader;
            IndexReader ir2 = writer2.Reader;

            MultiReader   mr       = new MultiReader(ir1, ir2);
            IndexSearcher searcher = NewSearcher(mr);
            FuzzyQuery    fq       = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false);
            TopDocs       docs     = searcher.Search(fq, 2);

            Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's
            mr.Dispose();
            ir1.Dispose();
            ir2.Dispose();
            writer.Dispose();
            writer2.Dispose();
            directory.Dispose();
            directory2.Dispose();
        }
        public virtual void TestDistanceAsEditsSearching()
        {
            Directory         index = NewDirectory();
            RandomIndexWriter w     = new RandomIndexWriter(Random(), index);

            AddDoc("foobar", w);
            AddDoc("test", w);
            AddDoc("working", w);
            IndexReader   reader   = w.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            w.Dispose();

            FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2);

            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            q    = new FuzzyQuery(new Term("field", "foubara"), 2);
            hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            try
            {
                q = new FuzzyQuery(new Term("field", "t"), 3);
                Assert.Fail();
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            reader.Dispose();
            index.Dispose();
        }
Exemple #22
0
        public override bool Equals(System.Object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (!base.Equals(obj))
            {
                return(false);
            }
            if (GetType() != obj.GetType())
            {
                return(false);
            }
            FuzzyQuery other = (FuzzyQuery)obj;

            if (BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minimumSimilarity), 0))
            {
                return(false);
            }
            if (prefixLength != other.prefixLength)
            {
                return(false);
            }
            if (Term == null)
            {
                if (other.Term != null)
                {
                    return(false);
                }
            }
            else if (!Term.Equals(other.Term))
            {
                return(false);
            }
            return(true);
        }
Exemple #23
0
        public virtual void TestFuzziness()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            IList <string> order = new List <string> {
                "bbbbb", "abbbb", "aabbb"
            };

            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // this query would normally return 3 documents, because 3 terms match (see above):
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "only 2 documents should match");
            order = new List <string> {
                "bbbbb", "abbbb"
            };
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
        private void search()
        {
            DateTime start = DateTime.Now;
            // create the result DataTable
            this.Results.Columns.Add("title", typeof(string));
            this.Results.Columns.Add("sample", typeof(string));
            this.Results.Columns.Add("path", typeof(string));
            this.Results.Columns.Add("url", typeof(string));
            this.Results.Columns.Add("Type", typeof(string));

            // create the searcher
            // index is placed in "index" subdirectory
            string indexDirectory = Server.MapPath("~/App_Data/index");

            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //   List<string> STOP_WORDS =  StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>();
            IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
            BooleanQuery bquery = new BooleanQuery();
            //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer);
            List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" };
            List<string> Projects = new List<string>();
            if (Session["ProjectList"] != null)
            {
                Projects = (List<string>)Session["ProjectList"];
            }

            List<string> allType = new List<string> { "A", "B", "C" };
            if (this.Request.QueryString["Page"] != null)
            {
                if (allType.Contains(this.Request.QueryString["Page"].ToString()))
                {
                    allType.Remove(this.Request.QueryString["Page"]);
                    foreach (string type in allType)
                    {
                        TermQuery termq1 = new TermQuery(new Term("EXTPRP", type));
                        bquery.Add(termq1, Occur.MUST_NOT);
                        FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0);
                        bquery.Add(termq, Occur.MUST_NOT);
                    }
                }
            }

            //Query query = parser.Parse(this.Query);
            //foreach (string term in SearchTerm)
            //{
            //    if (term == "title")
            //    {
            //        TermQuery termq = new TermQuery(new Term(term, this.Query));
            //        termq.Boost = 50f;
            //        bquery.Add(termq, Occur.SHOULD);
            //    }
            //    else
            //    {
            //        TermQuery termq = new TermQuery(new Term(term, this.Query));
            //        termq.Boost = 5f;
            //        bquery.Add(termq, Occur.SHOULD);
            //    }

            //}

            foreach (string term in SearchTerm)
            {
                if (term == "title")
                {
                    TermQuery termq = new TermQuery(new Term(term, this.Query));
                    termq.Boost = 5f;
                    bquery.Add(termq, Occur.SHOULD);
                }
                else
                {
                    FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0);
                    termq.Boost = 0.1f;
                    bquery.Add(termq, Occur.SHOULD);
                }
            }

            //foreach (string project in Projects)
            //{
            //    TermQuery termq1 = new TermQuery(new Term("Project", project));
            //    bquery.Add(termq1, Occur.MUST_NOT);

            //}

            //foreach (string project in Projects.Distinct())
            //{
            //    TermQuery termq1 = new TermQuery(new Term("path", project));
            //    bquery.Add(termq1, Occur.MUST);
            //    FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0);
            //    bquery.Add(termq, Occur.MUST);
            //}

            //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD);

            //List<ScoreDoc> TempArrList = new List<ScoreDoc>();

            TopDocs hits = searcher.Search(bquery, null, 10000);

            //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore);
            //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0);
            //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray();

            if (Projects.Count() != 0)
            {
                hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray();
            }

            //foreach (string project in Projects.Distinct())
            //{
            //    //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray();
            //    string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt");
            //    hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray();
            //}

            this.total = hits.ScoreDocs.Count();

            this.startAt = InitStartAt();

            int resultsCount = Math.Min(total, this.maxResults + this.startAt);

            // create highlighter
            IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");
            SimpleFragmenter fragmenter = new SimpleFragmenter(200);
            QueryScorer scorer = new QueryScorer(bquery);
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.TextFragmenter = fragmenter;

            int j = 0;

            for (int i = startAt; i < resultsCount; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String path = doc.Get("path");
                string getExtension = doc.Get("Extension");

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                String sample = "";
                try
                {
                    string document = doc.Get("text");
                    if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp")
                    {
                        sample = "";
                    }
                    else
                    {
                        sample = highlighter.GetBestFragment(stream, document);//, 2, "...");
                    }

                }
                catch (Exception ex)
                {
                }

                // create a new row with the result data
                DataRow row = this.Results.NewRow();
                row["title"] = doc.Get("title");
                row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", "");
                row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", "");
                row["sample"] = sample;
                if (path.Contains('.'))
                {
                    row["Type"] = GetMIMEType(path);
                }
                //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP")))
                //{
                this.Results.Rows.Add(row);
                //}
                j++;

            }

            Repeater1.DataSource = Results;
            Repeater1.DataBind();

            searcher.Dispose();

            // result information
            this.duration = DateTime.Now - start;
            this.fromItem = startAt + 1;
            this.toItem = Math.Min(startAt + maxResults, total);
        }
Exemple #25
0
 public override Query VisitFuzzyQuery(FuzzyQuery fuzzyq)
 {
     var term = fuzzyq.GetTerm();
     var visited = VisitTerm(term);
     if (term == visited)
         return fuzzyq;
     if (visited == null)
         return null;
     return new FuzzyQuery(visited);
 }
Exemple #26
0
        public virtual void TestDistanceAsEditsSearching()
        {
            Directory index = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), index);
            AddDoc("foobar", w);
            AddDoc("test", w);
            AddDoc("working", w);
            IndexReader reader = w.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            w.Dispose();

            FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2);
            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            q = new FuzzyQuery(new Term("field", "foubara"), 2);
            hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field"));

            try
            {
                q = new FuzzyQuery(new Term("field", "t"), 3);
                Assert.Fail();
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            reader.Dispose();
            index.Dispose();
        }
Exemple #27
0
        public virtual void TestBoostOnlyRewrite()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory);
            AddDoc("Lucene", writer);
            AddDoc("Lucene", writer);
            AddDoc("Lucenne", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene"));
            query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            // normally, 'Lucenne' would be the first result as IDF will skew the score.
            Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field"));
            Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field"));
            Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field"));
            reader.Dispose();
            directory.Dispose();
        }
 protected override ParameterizedSql BuildQuery(FuzzyQuery query)
 {
     // FuzzyQuery are to be treated as TermQueries. No actual fuzzy search.
     return BuildQuery(new TermQuery(new Term(FieldPlaceholder, query.Term.Text)));
 }
Exemple #29
0
        private List<int> SearchManualField(string field, string search, List<int> manualIds)
        {
            IndexReader reader = IndexReader.Open(directoryTemp, true);
            Searcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);
            var query = new FuzzyQuery(new Term(field, search), 0.45f);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true);
            searcher.Search(query, collector);
            ScoreDoc[] hits = collector.TopDocs().ScoreDocs;

            foreach (ScoreDoc scoreDoc in hits)
            {
                Document document = searcher.Doc(scoreDoc.Doc);
                int manualId = int.Parse(document.Get("Id"));
                if (!manualIds.Contains(manualId))
                {
                    manualIds.Add(manualId);
                }
            }
            reader.Dispose();
            searcher.Dispose();
            analyzer.Close();

            return manualIds;
        }
        public virtual void  TestFuzzinessLong()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            AddDoc("aaaaaaa", writer);
            AddDoc("segment", writer);
            writer.Optimize();
            writer.Close();
            IndexSearcher searcher = new IndexSearcher(directory);

            FuzzyQuery query;

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);
            // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
            // in testDefaultFuzziness so a bigger difference is allowed:
            query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa"));
            query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa"));
            query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // no match, more than half of the characters is wrong:
            query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // "student" and "stellent" are indeed similar to "segment" by default:
            query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);

            // now with prefix
            query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // "student" doesn't match anymore thanks to increased minimum similarity:
            query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            try
            {
                query = new FuzzyQuery(new Term("field", "student"), 1.1f);
                Assert.Fail("Expected IllegalArgumentException");
            }
            catch (System.ArgumentException e)
            {
                // expecting exception
            }
            try
            {
                query = new FuzzyQuery(new Term("field", "student"), -0.1f);
                Assert.Fail("Expected IllegalArgumentException");
            }
            catch (System.ArgumentException e)
            {
                // expecting exception
            }

            searcher.Close();
            directory.Close();
        }
Exemple #31
0
        //********************************************************************************************************************************
        private void search()
        {
            if (TextBoxQuery.Text != "")
            {
                DateTime start = DateTime.Now;
                // create the result DataTable
                this.Results.Columns.Add("title", typeof(string));
                this.Results.Columns.Add("sample", typeof(string));
                this.Results.Columns.Add("path", typeof(string));
                this.Results.Columns.Add("url", typeof(string));
                this.Results.Columns.Add("Type", typeof(string));

                // create the searcher
                // index is placed in "index" subdirectory
                string indexDirectory = Server.MapPath(IndexDirPath);
                var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                //   List<string> STOP_WORDS =  StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>();
                IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
                BooleanQuery bquery = new BooleanQuery();
                //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer);
                List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" };
                List<string> Projects = new List<string>();
                if (Session["ProjectList"] != null)
                {
                    Projects = (List<string>)Session["ProjectList"];
                }

                List<string> allType = null;
                if (hnkClickLink.Value == "")
                {
                    allType = new List<string>();
                }
                else
                {
                    allType = new List<string> { "Doc", "Code", "Images", "Other" };
                }

                if (this.Request.QueryString["Page"] != null)
                {
                    if (allType.Contains(Convert.ToString(hnkClickLink.Value)))
                    {
                        allType.Remove(Convert.ToString(hnkClickLink.Value));
                        foreach (string type in allType)
                        {
                            TermQuery termq1 = new TermQuery(new Term("EXTPRP", type));
                            bquery.Add(termq1, Occur.MUST_NOT);
                            FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0);
                            bquery.Add(termq, Occur.MUST_NOT);
                        }
                    }
                }

                //Query query = parser.Parse(this.Query);
                foreach (string term in SearchTerm)
                {
                    if (term == "title")
                    {
                        TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower()));
                        termq.Boost = 5f;
                        bquery.Add(termq, Occur.SHOULD);
                    }
                    else
                    {
                        TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower()));
                        termq.Boost = 0.1f;
                        bquery.Add(termq, Occur.SHOULD);
                    }
                }

                foreach (string term in SearchTerm)
                {
                    if (this.Query.Contains("."))
                    {
                        string SearchKeyword = this.Query.Replace(".", "");
                        if (term == "Extension")
                        {
                            TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower()));
                            termq.Boost = 5f;
                            bquery.Add(termq, Occur.SHOULD);
                        }
                    }
                    else
                    {
                        if (term == "title")
                        {
                            FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower()));
                            termq.Boost = 5f;
                            bquery.Add(termq, Occur.SHOULD);
                        }
                        else
                        {
                            //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0);
                            //termq.Boost = 0.1f;
                            //bquery.Add(termq, Occur.SHOULD);
                        }
                    }
                }

                TopDocs hits = searcher.Search(bquery, null, 10000);

                //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore);
                //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0);
                //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray();

                if (Projects.Count() != 0)
                {
                    hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray();
                }

                this.total = hits.ScoreDocs.Count();

                this.startAt = InitStartAt();

                int resultsCount = Math.Min(total, this.maxResults + this.startAt);

                // create highlighter
                IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>");
                SimpleFragmenter fragmenter = new SimpleFragmenter(200);
                QueryScorer scorer = new QueryScorer(bquery);
                Highlighter highlighter = new Highlighter(formatter, scorer);
                highlighter.TextFragmenter = fragmenter;
                //highlighter.MaxDocCharsToAnalyze=200;

                //for (int i = startAt; i < resultsCount; i++)
                //{
                //    Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                //    String path = doc.Get("path");
                //    string getExtension = doc.Get("Extension");

                //    TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                //    String sample = "";
                //    try
                //    {
                //        string document = doc.Get("text");
                //        if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg")
                //        {
                //            sample = "";
                //        }
                //        else
                //        {
                //            string outp = highlighter.GetBestFragment(stream, document);
                //            if (outp != null)
                //                sample = ReplaceSpecialChar(outp.Trim()); //, 2, "...");
                //            else
                //                sample = Limit(doc.Get("text").Trim(), 200);
                //        }

                //    }
                //    catch (Exception ex)
                //    {
                //    }

                //    // create a new row with the result data
                //    DataRow row = this.Results.NewRow();
                //    row["title"] = doc.Get("title");
                //    row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                //    row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                //    row["sample"] = sample;
                //    if (path.Contains('.'))
                //    {
                //        row["Type"] = GetMIMEType(path);
                //    }

                //    this.Results.Rows.Add(row);
                //}

                for (int i = 0; i < this.total; i++)
                {
                    Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                    String path = doc.Get("path");
                    string getExtension = doc.Get("Extension");

                    TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
                    String sample = "";
                    try
                    {
                        string document = doc.Get("text");
                        if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg")
                        {
                            sample = "";
                        }
                        else
                        {
                            string outp = highlighter.GetBestFragment(stream, document);
                            if (outp != null)
                                sample = Limit(outp.Trim(), 200); //, 2, "...");
                            else
                                sample = Limit(doc.Get("text").Trim(), 200);
                        }

                    }
                    catch (Exception ex)
                    {
                    }

                    // create a new row with the result data
                    DataRow row = Results.NewRow();
                    row["title"] = doc.Get("title");
                    row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                    row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, "");
                    row["sample"] = sample;
                    if (path.Contains('.'))
                    {
                        row["Type"] = GetMIMEType(path);
                    }

                    Results.Rows.Add(row);
                }

                //****************************** Logic for Paging for Repeater Control****************************************
                PagedDataSource pgitems = new PagedDataSource();
                DataView dv = new DataView(Results);
                pgitems.DataSource = dv;

                pgitems.AllowPaging = true;

                pgitems.PageSize = 10;//You can set the number of items here using some logic.

                pgitems.CurrentPageIndex = PageNumber;

                btnPrev.Visible = !pgitems.IsFirstPage;
                btnNext.Visible = !pgitems.IsLastPage;

                if (pgitems.PageCount > 1)
                {
                    rptPages.Visible = true;
                    ArrayList pages = new ArrayList();
                    for (int i = PageNumber; i < 5 + PageNumber; i++)
                        pages.Add((i + 1).ToString());
                    rptPages.DataSource = pages;
                    rptPages.DataBind();
                }
                else
                    rptPages.Visible = false;

                Repeater1.DataSource = pgitems;
                Repeater1.DataBind();
                //*************************************************************************************************************

                //Repeater1.DataSource = Results;
                //Repeater1.DataBind();

                searcher.Dispose();

                // result information
                this.duration = DateTime.Now - start;
                this.fromItem = startAt + 1;
                this.toItem = Math.Min(startAt + maxResults, total);
            }
        }
Exemple #32
0
        private static Query BuildTermQuery(string termQuery)
        {
            Query mainQuery;
            if (termQuery.Length < 3)
                mainQuery = new PrefixQuery(new Term("name", termQuery));
            else
            {
                BooleanQuery boolQuery = new BooleanQuery();

                var fuzzQuery = new FuzzyQuery(new Term("name", termQuery));
                var prefQuery = new PrefixQuery(new Term("name", termQuery));
                boolQuery.Add(prefQuery, BooleanClause.Occur.SHOULD);
                boolQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD);

                mainQuery = boolQuery;

                //first implemetation
                //mainQuery = fuzzQuery;

            }
            return mainQuery;
        }
		public virtual void  TestTokenLengthOpt()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			AddDoc("12345678911", writer);
			AddDoc("segment", writer);
			writer.Optimize();
			writer.Close();
		    IndexSearcher searcher = new IndexSearcher(directory, true);
			
			Query query;
			// term not over 10 chars, so optimization shortcuts
			query = new FuzzyQuery(new Term("field", "1234569"), 0.9f);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// 10 chars, so no optimization
			query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// over 10 chars, so no optimization
			query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			// over 10 chars, no match
			query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
		}
		public virtual void  TestFuzzinessLong()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			AddDoc("aaaaaaa", writer);
			AddDoc("segment", writer);
			writer.Optimize();
			writer.Close();
	        IndexSearcher searcher = new IndexSearcher(directory, true);
			
			FuzzyQuery query;
			// not similar enough:
			query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
			// in testDefaultFuzziness so a bigger difference is allowed:
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// no match, more than half of the characters is wrong:
			query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// "student" and "stellent" are indeed similar to "segment" by default:
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);
			
			// "student" doesn't match anymore thanks to increased minimum similarity:
			query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0);
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

	        Assert.Throws<ArgumentException>(() => new FuzzyQuery(new Term("field", "student"), 1.1f),
	                                         "Expected ArgumentException");
	        Assert.Throws<ArgumentException>(() => new FuzzyQuery(new Term("field", "student"), -0.1f),
	                                         "Expected ArgumentException");
			
			searcher.Close();
			directory.Close();
		}
 private static void MultiTermQuery(FuzzyQuery query, AzureQueryLogger.IndentedTextWriter writer)
 {
     writer.WriteLine("MinSimilarity: {0}", (object)query.MinSimilarity);
     writer.WriteLine("PrefixLength: {0}", (object)query.PrefixLength);
     AzureQueryLogger.VisitTerm(query.Term, "Fuzzy Term", writer);
 }
Exemple #36
0
        /// <summary>
        /// Sets up and adds a fuzzy query object allowing the search for an explcit term in the field
        /// </summary>
        /// <param name="fieldName">The field name to search within</param>
        /// <param name="fieldValue">The value to match</param>
        /// <param name="occur">Whether it must, must not or should occur in the field</param>
        /// <param name="boost">A boost multiplier (1 is default / normal).</param>
        /// <param name="key">The dictionary key to allow reference beyond the initial scope</param>
        /// <param name="caseSensitive">A boolean denoting whether or not to retain case</param>
        /// <returns>The generated fuzzy query object</returns>
        public virtual FuzzyQuery Fuzzy(string fieldName, string fieldValue, BooleanClause.Occur occur = null, float? boost = null, string key = null, bool? caseSensitive = null)
        {
            Term term = GetTerm(fieldName, fieldValue, caseSensitive);
            FuzzyQuery query = new FuzzyQuery(term);
            SetBoostValue(query, boost);

            Add(query, occur, key);
            return query;
        }
        public virtual void  TestFuzziness()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);
            writer.Optimize();
            writer.Close();
            IndexSearcher searcher = new IndexSearcher(directory);

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0);             // edit distance to "aaaaa" = 3
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);


            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);


            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length);

            searcher.Close();
            directory.Close();
        }
Exemple #38
0
        public virtual void TestFuzziness()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true,
                                                     IndexWriter.MaxFieldLength.LIMITED);

            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);
            writer.Optimize();
            writer.Close();
            IndexSearcher searcher = new IndexSearcher(directory, true);

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            List <String> order = new List <string>(new[] { "bbbbb", "abbbb", "aabbb" });

            for (int i = 0; i < hits.Length; i++)
            {
                String term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                Assert.AreEqual(order[i], term);
            }

            // test BooleanQuery.maxClauseCount
            int savedClauseCount = BooleanQuery.MaxClauseCount;

            try
            {
                BooleanQuery.MaxClauseCount = 2;
                // This query would normally return 3 documents, because 3 terms match (see above):
                query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0);
                hits  = searcher.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(2, hits.Length, "only 2 documents should match");
                order = new List <string>(new[] { "bbbbb", "abbbb" });
                for (int i = 0; i < hits.Length; i++)
                {
                    String term = searcher.Doc(hits[i].Doc).Get("field");
                    //System.out.println(hits[i].score);
                    Assert.AreEqual(order[i], term);
                }
            }
            finally
            {
                BooleanQuery.MaxClauseCount = savedClauseCount;
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0);
            // edit distance to "aaaaa" = 3
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);


            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);


            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            searcher.Close();
            directory.Close();
        }
        public virtual void TestFuzzy()
        {
            Query regular = new TermQuery(new Term("field", "foobar"));
            Query expected = new FuzzyQuery(new Term("field", "foobar"), 2);

            assertEquals(expected, Parse("foobar~2"));
            assertEquals(regular, Parse("foobar~"));
            assertEquals(regular, Parse("foobar~a"));
            assertEquals(regular, Parse("foobar~1a"));

            BooleanQuery @bool = new BooleanQuery();
            FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
            @bool.Add(fuzzy, BooleanClause.Occur.MUST);
            @bool.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST);

            assertEquals(@bool, Parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar"));
        }
Exemple #40
0
        public virtual void TestTieBreaker()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory);
            AddDoc("a123456", writer);
            AddDoc("c123456", writer);
            AddDoc("d123456", writer);
            AddDoc("e123456", writer);

            Directory directory2 = NewDirectory();
            RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2);
            AddDoc("a123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("b123456", writer2);
            AddDoc("c123456", writer2);
            AddDoc("f123456", writer2);

            IndexReader ir1 = writer.Reader;
            IndexReader ir2 = writer2.Reader;

            MultiReader mr = new MultiReader(ir1, ir2);
            IndexSearcher searcher = NewSearcher(mr);
            FuzzyQuery fq = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false);
            TopDocs docs = searcher.Search(fq, 2);
            Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's
            mr.Dispose();
            ir1.Dispose();
            ir2.Dispose();
            writer.Dispose();
            writer2.Dispose();
            directory.Dispose();
            directory2.Dispose();
        }
		public virtual void  TestFuzziness()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			AddDoc("aaaaa", writer);
			AddDoc("aaaab", writer);
			AddDoc("aaabb", writer);
			AddDoc("aabbb", writer);
			AddDoc("abbbb", writer);
			AddDoc("bbbbb", writer);
			AddDoc("ddddd", writer);
			writer.Optimize();
			writer.Close();
			IndexSearcher searcher = new IndexSearcher(directory);
			
			FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
			Hits hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			
			// same with prefix
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4);
			hits = searcher.Search(query);
			Assert.AreEqual(2, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			// not similar enough:
			query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			// query identical to a word in the index:
			query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			// default allows for up to two edits:
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb"));
			
			// query similar to a word in the index:
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb"));
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb"));
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb"));
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3);
			hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb"));
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4);
			hits = searcher.Search(query);
			Assert.AreEqual(2, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa"));
			Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab"));
			query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd"));
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd"));
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd"));
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd"));
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd"));
			query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			
			// different field = no match:
			query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			searcher.Close();
			directory.Close();
		}
Exemple #42
0
        public virtual void TestGiga()
        {
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            Directory index = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), index);

            AddDoc("Lucene in Action", w);
            AddDoc("Lucene for Dummies", w);

            //addDoc("Giga", w);
            AddDoc("Giga byte", w);

            AddDoc("ManagingGigabytesManagingGigabyte", w);
            AddDoc("ManagingGigabytesManagingGigabytes", w);

            AddDoc("The Art of Computer Science", w);
            AddDoc("J. K. Rowling", w);
            AddDoc("JK Rowling", w);
            AddDoc("Joanne K Roling", w);
            AddDoc("Bruce Willis", w);
            AddDoc("Willis bruce", w);
            AddDoc("Brute willis", w);
            AddDoc("B. willis", w);
            IndexReader r = w.Reader;
            w.Dispose();

            Query q = new FuzzyQuery(new Term("field", "giga"), 0);

            // 3. search
            IndexSearcher searcher = NewSearcher(r);
            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
            r.Dispose();
            index.Dispose();
        }
		public virtual void  TestFuzzinessLong()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			AddDoc("aaaaaaa", writer);
			AddDoc("segment", writer);
			writer.Optimize();
			writer.Close();
			IndexSearcher searcher = new IndexSearcher(directory);
			
			FuzzyQuery query;
			// not similar enough:
			query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
			Hits hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
			// in testDefaultFuzziness so a bigger difference is allowed:
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa"));
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa"));
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa"));
			query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			// no match, more than half of the characters is wrong:
			query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			// "student" and "stellent" are indeed similar to "segment" by default:
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			// now with prefix
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			// "student" doesn't match anymore thanks to increased minimum similarity:
			query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0);
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length());
			
			try
			{
				query = new FuzzyQuery(new Term("field", "student"), 1.1f);
				Assert.Fail("Expected IllegalArgumentException");
			}
			catch (System.ArgumentException)
			{
				// expecting exception
			}
			try
			{
				query = new FuzzyQuery(new Term("field", "student"), - 0.1f);
				Assert.Fail("Expected IllegalArgumentException");
			}
			catch (System.ArgumentException)
			{
				// expecting exception
			}
			
			searcher.Close();
			directory.Close();
		}
Exemple #44
0
        public virtual void TestFuzziness()
        {
            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory);
            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);
            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            IList<string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // this query would normally return 3 documents, because 3 terms match (see above):
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "only 2 documents should match");
            order = Arrays.AsList("bbbbb", "abbbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
 protected abstract ParameterizedSql BuildQuery(FuzzyQuery query);
Exemple #46
0
 /// <summary>
 /// Factory method to generate a fuzzy query.
 /// </summary>
 protected virtual Query NewFuzzyQuery(string text, int fuzziness)
 {
     BooleanQuery bq = new BooleanQuery(true);
     foreach (var entry in weights)
     {
         Query q = new FuzzyQuery(new Term(entry.Key, text), fuzziness);
         if (q != null)
         {
             q.Boost = entry.Value;
             bq.Add(q, BooleanClause.Occur.SHOULD);
         }
     }
     return Simplify(bq);
 }
        private ParameterizedSql BuildFuzzy(FuzzyQuery fuzzyQuery)
        {
            Term term = CopyTerm(fuzzyQuery.Term);
            if (term != null)
            {
                return BuildQuery(new FuzzyQuery(term, fuzzyQuery.MinSimilarity, fuzzyQuery.PrefixLength));
            }

            return null;
        }
        public virtual void TestFuzziness()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);
            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);
            writer.Optimize();
            writer.Close();
            IndexSearcher searcher = new IndexSearcher(directory, true);

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            List<String> order = new List<string>(new[] {"bbbbb", "abbbb", "aabbb"});
            for (int i = 0; i < hits.Length; i++)
            {
                String term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                Assert.AreEqual(order[i], term);
            }

            // test BooleanQuery.maxClauseCount
            int savedClauseCount = BooleanQuery.MaxClauseCount;
            try
            {
                BooleanQuery.MaxClauseCount = 2;
                // This query would normally return 3 documents, because 3 terms match (see above):
                query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0);
                hits = searcher.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(2, hits.Length, "only 2 documents should match");
                order = new List<string>(new[] {"bbbbb", "abbbb"});
                for (int i = 0; i < hits.Length; i++)
                {
                    String term = searcher.Doc(hits[i].Doc).Get("field");
                    //System.out.println(hits[i].score);
                    Assert.AreEqual(order[i], term);
                }
            }
            finally
            {
                BooleanQuery.MaxClauseCount = savedClauseCount;
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0);
                // edit distance to "aaaaa" = 3
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);


            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);


            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            searcher.Close();
            directory.Close();
        }
Exemple #49
0
        public static TopDocs Query(string keyword)
        {
            var indexSearcher = IndexManager.GenerateSearcher();
            #region 生成Query语句

            var field = new string[2];
            field[0] = "fileName";
            field[1] = "fileContent";

            var boolQuery = new BooleanQuery();

            //if (!string.IsNullOrEmpty(keyword))
            //{
            var keywordQuery = new BooleanQuery();
            string queryKeyword = GetKeyWordsSplitBySpace(keyword, new PanGuTokenizer());//对关键字进行分词处理
            #region 查询fileName

            var term = new Term(field[0], keyword );
            var fuzzQuery = new FuzzyQuery(term);
            keywordQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD);
            #endregion
            #region 查询fileContent

            term = new Term(field[1], keyword );
            fuzzQuery = new FuzzyQuery(term);
            keywordQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD);
            #endregion
            boolQuery.Add(keywordQuery, BooleanClause.Occur.MUST);
            //}
            #endregion

            return indexSearcher.Search(boolQuery, null, 1000);
        }