public virtual void TestHashCodeAndEquals() { MultiPhraseQuery query1 = new MultiPhraseQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.IsTrue(query1.Equals(query2)); Assert.AreEqual(query1, query2); Term term1 = new Term("someField", "someText"); query1.Add(term1); query2.Add(term1); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.AreEqual(query1, query2); Term term2 = new Term("someField", "someMoreText"); query1.Add(term2); Assert.IsFalse(query1.GetHashCode() == query2.GetHashCode()); Assert.IsFalse(query1.Equals(query2)); query2.Add(term2); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.AreEqual(query1, query2); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public virtual void TestTall() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("blueberry chocolate pie", writer); Add("blueberry chocolate tart", writer); IndexReader r = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "blueberry")); q.Add(new Term("body", "chocolate")); q.Add(new Term[] { new Term("body", "pie"), new Term("body", "tart") }); Assert.AreEqual(2, searcher.Search(q, 1).TotalHits); r.Dispose(); indexStore.Dispose(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new System.Collections.Hashtable(0)), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); Add("this is a test", "object", writer); Add("a note", "note", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // this query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestNoDocs() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("a note", "note", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "a")); q.Add(new Term[] { new Term("body", "nope"), new Term("body", "nope") }); Assert.AreEqual(0, searcher.Search(q, null, 1).TotalHits, "Wrong number of hits"); // just make sure no exc: searcher.Explain(q, 0); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestCustomIDF() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("this is a test", "object", writer); Add("a note", "note", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new DefaultSimilarityAnonymousClass(this); MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term[] { new Term("body", "this"), new Term("body", "that") }); query.Add(new Term("body", "is")); Weight weight = query.CreateWeight(searcher); Assert.AreEqual(10f * 10f, weight.GetValueForNormalization(), 0.001f); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestCustomIDF() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); Add("this is a test", "object", writer); Add("a note", "note", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this); MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term[] { new Term("body", "this"), new Term("body", "that") }); query.Add(new Term("body", "is")); Weight weight = query.CreateWeight(searcher); Assert.AreEqual(10f * 10f, weight.ValueForNormalization, 0.001f); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestMultiSloppyWithRepeats() //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("a b c d e f g h i k", writer); IndexReader r = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); // this will fail, when the scorer would propagate [a] rather than [a,b], q.Add(new Term[] { new Term("body", "a"), new Term("body", "b") }); q.Add(new Term[] { new Term("body", "a") }); q.Slop = 6; Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b" r.Dispose(); indexStore.Dispose(); }
public virtual void TestMPQ3() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new string[] { "w1", "xx" })); q.Add(Ta(new string[] { "w2", "w3" })); Qtest(q, new int[] { 0, 1, 2, 3 }); }
public virtual void TestMPQ2() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new System.String[] { "w1" })); q.Add(Ta(new System.String[] { "w2", "w3" })); Qtest(q, new int[] { 0, 1, 3 }); }
public virtual void TestMPQ4() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new string[] { "w1" })); q.Add(Ta(new string[] { "w2" })); Qtest(q, new int[] { 0 }); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } }while (te.Next()); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestMPQ5() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new System.String[] { "w1" })); q.Add(Ta(new System.String[] { "w2" })); q.SetSlop(1); Qtest(q, new int[] { 0, 1, 2 }); }
public virtual void TestMPQ7() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new string[] { "w1" })); q.Add(Ta(new string[] { "w2" })); q.Slop = 1; q.Boost = 0.0f; Bqtest(q, new int[] { 0, 1, 2 }); }
public virtual void TestZeroPosIncrSloppyParsedAnd() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1); q.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0); q.Add(new Term[] { new Term("field", "c") }, 1); DoTestZeroPosIncrSloppy(q, 0); q.Slop = 1; DoTestZeroPosIncrSloppy(q, 0); q.Slop = 2; DoTestZeroPosIncrSloppy(q, 1); }
public virtual void TestExactPhraseVersusMultiPhraseWithHoles() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2, 2); Term t3 = RandomTerm(); MultiPhraseQuery q2 = new MultiPhraseQuery(); q2.Add(t1); q2.Add(new Term[] { t2, t3 }, 2); AssertSubsetOf(q1, q2); }
public virtual void TestNegativeSlop() { MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); try { query.Slop = -2; Assert.Fail("didn't get expected exception"); } catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) { // expected exception } }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); searcher.Close(); }
public virtual void TestNegativeSlop() { MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); try { query.Slop = -2; Assert.Fail("didn't get expected exception"); } catch (System.ArgumentException expected) { // expected exception } }
public virtual void TestNegativeSlop() { MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); try { query.Slop = -2; Assert.Fail("didn't get expected exception"); } #pragma warning disable 168 catch (ArgumentException expected) #pragma warning restore 168 { // expected exception } }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestMultiExactWithRepeats() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); Add("a b c d e f g h i k", writer); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term[] { new Term("body", "a"), new Term("body", "d") }, 0); q.Add(new Term[] { new Term("body", "a"), new Term("body", "f") }, 2); Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b" r.Dispose(); indexStore.Dispose(); }
public virtual void TestTall() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); Add("blueberry chocolate pie", writer); Add("blueberry chocolate tart", writer); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "blueberry")); q.Add(new Term("body", "chocolate")); q.Add(new Term[] { new Term("body", "pie"), new Term("body", "tart") }); Assert.AreEqual(2, searcher.Search(q, 1).TotalHits); r.Dispose(); indexStore.Dispose(); }
public virtual void TestNoDocs() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, indexStore, Similarity, TimeZone); Add("a note", "note", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "a")); q.Add(new Term[] { new Term("body", "nope"), new Term("body", "nope") }); Assert.AreEqual(0, searcher.Search(q, null, 1).TotalHits, "Wrong number of hits"); // just make sure no exc: searcher.Explain(q, 0); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestMultiExactWithRepeats() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("a b c d e f g h i k", writer); IndexReader r = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term[] { new Term("body", "a"), new Term("body", "d") }, 0); q.Add(new Term[] { new Term("body", "a"), new Term("body", "f") }, 2); Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b" r.Dispose(); indexStore.Dispose(); }
public virtual void TestZeroPosIncrSloppyMpqAndOrNoMatch() { MultiPhraseQuery mpq = new MultiPhraseQuery(); foreach (Token[] tap in INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) { Term[] terms = TapTerms(tap); int pos = tap[0].PositionIncrement - 1; mpq.Add(terms, pos); //AND logic in pos, OR across lines } DoTestZeroPosIncrSloppy(mpq, 0); mpq.Slop = 2; DoTestZeroPosIncrSloppy(mpq, 0); }
public virtual void TestZeroPosIncrSloppyMpqAnd() { MultiPhraseQuery mpq = new MultiPhraseQuery(); int pos = -1; foreach (Token tap in INCR_0_QUERY_TOKENS_AND) { pos += tap.PositionIncrement; mpq.Add(new Term[] { new Term("field", tap.ToString()) }, pos); //AND logic } DoTestZeroPosIncrSloppy(mpq, 0); mpq.Slop = 1; DoTestZeroPosIncrSloppy(mpq, 0); mpq.Slop = 2; DoTestZeroPosIncrSloppy(mpq, 1); }
private MultiPhraseQuery RandomPhraseQuery(long seed) { Random random = new J2N.Randomizer(seed); int length = TestUtil.NextInt32(random, 2, 5); MultiPhraseQuery pq = new MultiPhraseQuery(); int position = 0; for (int i = 0; i < length; i++) { int depth = TestUtil.NextInt32(random, 1, 3); Term[] terms = new Term[depth]; for (int j = 0; j < depth; j++) { terms[j] = new Term("field", "" + (char)TestUtil.NextInt32(random, 'a', 'z')); } pq.Add(terms, position); position += TestUtil.NextInt32(random, 1, 3); } return(pq); }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, indexStore); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); // this query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); // just make sure no exc: searcher.Explain(q, 0); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual Query Build(IQueryNode queryNode) { MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode; MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); IList<IQueryNode> children = phraseNode.GetChildren(); if (children != null) { IDictionary<int?, List<Term>> positionTermMap = new SortedDictionary<int?, List<Term>>(); foreach (IQueryNode child in children) { FieldQueryNode termNode = (FieldQueryNode)child; TermQuery termQuery = (TermQuery)termNode .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); List<Term> termList; if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null) { termList = new List<Term>(); positionTermMap[termNode.PositionIncrement] = termList; } termList.Add(termQuery.Term); } foreach (int positionIncrement in positionTermMap.Keys) { List<Term> termList = positionTermMap[positionIncrement]; phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/), positionIncrement); } } return phraseQuery; }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, analyzer); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public virtual void TestMPQ6() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new System.String[]{"w1", "w3"})); q.Add(Ta(new System.String[]{"w2"})); q.Slop = 1; Qtest(q, new int[]{0, 1, 2, 3}); }
public virtual void TestTall() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("blueberry chocolate pie", writer); Add("blueberry chocolate tart", writer); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "blueberry")); q.Add(new Term("body", "chocolate")); q.Add(new Term[] { new Term("body", "pie"), new Term("body", "tart") }); Assert.AreEqual(2, searcher.Search(q, 1).TotalHits); r.Dispose(); indexStore.Dispose(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousClass(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, store, analyzer); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.GetReader(); writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public virtual void TestCustomIDF() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("this is a test", "object", writer); Add("a note", "note", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(this); MultiPhraseQuery query = new MultiPhraseQuery(); query.Add(new Term[] { new Term("body", "this"), new Term("body", "that") }); query.Add(new Term("body", "is")); Weight weight = query.CreateWeight(searcher); Assert.AreEqual(10f * 10f, weight.ValueForNormalization, 0.001f); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestZeroPosIncr() { Directory dir = new RAMDirectory(); Token[] tokens = new Token[3]; tokens[0] = new Token(); tokens[0].Append("a"); tokens[0].PositionIncrement = 1; tokens[1] = new Token(); tokens[1].Append("b"); tokens[1].PositionIncrement = 0; tokens[2] = new Token(); tokens[2].Append("c"); tokens[2].PositionIncrement = 0; RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher s = NewSearcher(r); MultiPhraseQuery mpq = new MultiPhraseQuery(); //mpq.setSlop(1); // NOTE: not great that if we do the else clause here we // get different scores! MultiPhraseQuery counts that // phrase as occurring twice per doc (it should be 1, I // think?). this is because MultipleTermPositions is able to // return the same position more than once (0, in this // case): if (true) { mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); mpq.Add(new Term[] { new Term("field", "a") }, 0); } else { mpq.Add(new Term[] { new Term("field", "a") }, 0); mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); } TopDocs hits = s.Search(mpq, 2); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual(hits.ScoreDocs[0].Score, hits.ScoreDocs[1].Score, 1e-5); /* for(int hit=0;hit<hits.TotalHits;hit++) { ScoreDoc sd = hits.ScoreDocs[hit]; System.out.println(" hit doc=" + sd.Doc + " score=" + sd.Score); } */ r.Dispose(); dir.Dispose(); }
public virtual void TestPhrasePrefix() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("blueberry pie", writer); Add("blueberry strudel", writer); Add("blueberry pizza", writer); Add("blueberry chewing gum", writer); Add("bluebird pizza", writer); Add("bluebird foobar pizza", writer); Add("piccadilly circus", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // search for "blueberry pi*": MultiPhraseQuery query1 = new MultiPhraseQuery(); // search for "strawberry pi*": MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); LinkedList<Term> termsWithPrefix = new LinkedList<Term>(); // this TermEnum gives "piccadilly", "pie" and "pizza". string prefix = "pi"; TermsEnum te = MultiFields.GetFields(reader).Terms("body").Iterator(null); te.SeekCeil(new BytesRef(prefix)); do { string s = te.Term().Utf8ToString(); if (s.StartsWith(prefix)) { termsWithPrefix.AddLast(new Term("body", s)); } else { break; } } while (te.Next() != null); query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString()); query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString()); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); // search for "blue* pizza": MultiPhraseQuery query3 = new MultiPhraseQuery(); termsWithPrefix.Clear(); prefix = "blue"; te.SeekCeil(new BytesRef(prefix)); do { if (te.Term().Utf8ToString().StartsWith(prefix)) { termsWithPrefix.AddLast(new Term("body", te.Term().Utf8ToString())); } } while (te.Next() != null); query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); query3.Add(new Term("body", "pizza")); result = searcher.Search(query3, null, 1000).ScoreDocs; Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString()); // test slop: query3.Slop = 1; result = searcher.Search(query3, null, 1000).ScoreDocs; // just make sure no exc: searcher.Explain(query3, 0); Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird // foobar pizza MultiPhraseQuery query4 = new MultiPhraseQuery(); try { query4.Add(new Term("field1", "foo")); query4.Add(new Term("field2", "foobar")); Assert.Fail(); } catch (System.ArgumentException e) { // okay, all terms must belong to the same field } writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
private MultiPhraseQuery RandomPhraseQuery(int seed) { Random random = new Random(seed); int length = TestUtil.NextInt(random, 2, 5); MultiPhraseQuery pq = new MultiPhraseQuery(); int position = 0; for (int i = 0; i < length; i++) { int depth = TestUtil.NextInt(random, 1, 3); Term[] terms = new Term[depth]; for (int j = 0; j < depth; j++) { terms[j] = new Term("field", "" + (char)TestUtil.NextInt(random, 'a', 'z')); } pq.Add(terms, position); position += TestUtil.NextInt(random, 1, 3); } return pq; }
public virtual void TestNoDocs() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("a note", "note", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term("body", "a")); q.Add(new Term[] { new Term("body", "nope"), new Term("body", "nope") }); Assert.AreEqual(0, searcher.Search(q, null, 1).TotalHits, "Wrong number of hits"); // just make sure no exc: searcher.Explain(q, 0); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestMultiSloppyWithRepeats() //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("a b c d e f g h i k", writer); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); // this will fail, when the scorer would propagate [a] rather than [a,b], q.Add(new Term[] { new Term("body", "a"), new Term("body", "b") }); q.Add(new Term[] { new Term("body", "a") }); q.Slop = 6; Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b" r.Dispose(); indexStore.Dispose(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("this is a test", "object", writer); Add("a note", "note", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // this query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // this query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); // just make sure no exc: searcher.Explain(q, 0); writer.Dispose(); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>()), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[]{new Term("body", "test"), new Term("body", "this")}); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
private static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text) { TokenStream tokenStream = analyzer.TokenStream(field, new StringReader(text)); ITermAttribute termAttribute = tokenStream.AddAttribute<ITermAttribute>(); IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute<IPositionIncrementAttribute>(); List<List<Term>> terms = new List<List<Term>>(); List<Term> current = null; while (tokenStream.IncrementToken()) { if (positionIncrementAttribute.PositionIncrement > 0) { current = new List<Term>(); terms.Add(current); } if (current != null) { current.Add(new Term(field, termAttribute.Term)); } } if (terms.Count == 1 && terms[0].Count == 1) { return new TermQuery(terms[0][0]); } else if (terms.Select(l => l.Count).Sum() == terms.Count) { PhraseQuery phraseQuery = new PhraseQuery(); foreach (var positionList in terms) { phraseQuery.Add(positionList[0]); } return phraseQuery; } else { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); foreach (var positionList in terms) { multiPhraseQuery.Add(positionList.ToArray()); } return multiPhraseQuery; } }
public virtual void TestPhrasePrefix() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES)); doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES)); doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES)); doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES)); doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); // PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); LinkedList<Term> termsWithPrefix = new LinkedList<Term>(); // this TermEnum gives "piccadilly", "pie" and "pizza". string prefix = "pi"; TermsEnum te = MultiFields.GetFields(reader).Terms("body").Iterator(null); te.SeekCeil(new BytesRef(prefix)); do { string s = te.Term().Utf8ToString(); if (s.StartsWith(prefix)) { termsWithPrefix.AddLast(new Term("body", s)); } else { break; } } while (te.Next() != null); query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/)); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); reader.Dispose(); indexStore.Dispose(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new System.Collections.Hashtable(0)), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[]{new Term("body", "test"), new Term("body", "this")}); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public virtual void TestMPQ4() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new System.String[]{"w1"})); q.Add(Ta(new System.String[]{"w2"})); Qtest(q, new int[]{0}); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Add("blueberry pie", writer); Add("blueberry strudel", writer); Add("blueberry pizza", writer); Add("blueberry chewing gum", writer); Add("bluebird pizza", writer); Add("bluebird foobar pizza", writer); Add("piccadilly circus", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // search for "blueberry pi*": MultiPhraseQuery query1 = new MultiPhraseQuery(); // search for "strawberry pi*": MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix)); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString()); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString()); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); // search for "blue* pizza": MultiPhraseQuery query3 = new MultiPhraseQuery(); termsWithPrefix.Clear(); prefix = "blue"; te = ir.Terms(new Term("body", prefix)); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query3.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query3.Add(new Term("body", "pizza")); result = searcher.Search(query3, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString()); // test slop: query3.SetSlop(1); result = searcher.Search(query3, null, 1000).scoreDocs; Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird foobar pizza MultiPhraseQuery query4 = new MultiPhraseQuery(); try { query4.Add(new Term("field1", "foo")); query4.Add(new Term("field2", "foobar")); Assert.Fail(); } catch (System.ArgumentException e) { // okay, all terms must belong to the same field } searcher.Close(); indexStore.Close(); }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[]{new Term("body", "blueberry"), new Term("body", "blue")}); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); searcher.Close(); }
public virtual void TestHashCodeAndEquals() { MultiPhraseQuery query1 = new MultiPhraseQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.AreEqual(query1, query2); Term term1 = new Term("someField", "someText"); query1.Add(term1); query2.Add(term1); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.AreEqual(query1, query2); Term term2 = new Term("someField", "someMoreText"); query1.Add(term2); Assert.IsFalse(query1.GetHashCode() == query2.GetHashCode()); Assert.IsFalse(query1.Equals(query2)); query2.Add(term2); Assert.AreEqual(query1.GetHashCode(), query2.GetHashCode()); Assert.AreEqual(query1, query2); }
public virtual void TestMultiExactWithRepeats() { Directory indexStore = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore, Similarity, TimeZone); Add("a b c d e f g h i k", writer); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(r); MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(new Term[] { new Term("body", "a"), new Term("body", "d") }, 0); q.Add(new Term[] { new Term("body", "a"), new Term("body", "f") }, 2); Assert.AreEqual(1, searcher.Search(q, 1).TotalHits); // should match on "a b" r.Dispose(); indexStore.Dispose(); }
/// <exception cref="ParseException">throw in overridden method to disallow /// </exception> protected internal virtual Query GetFieldQuery(System.String field, System.String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText)); System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); Lucene.Net.Analysis.Token t; int positionCount = 0; bool severalTokensAtSamePosition = false; while (true) { try { t = source.Next(); } catch (System.IO.IOException e) { t = null; } if (t == null) break; v.Add(t); if (t.GetPositionIncrement() != 0) positionCount += t.GetPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.Close(); } catch (System.IO.IOException e) { // ignore } if (v.Count == 0) return null; else if (v.Count == 1) { t = (Lucene.Net.Analysis.Token) v[0]; return new TermQuery(new Term(field, t.TermText())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; TermQuery currentQuery = new TermQuery(new Term(field, t.TermText())); q.Add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); System.Collections.ArrayList multiTerms = new System.Collections.ArrayList(); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0) { mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); multiTerms.Clear(); } multiTerms.Add(new Term(field, t.TermText())); } mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); return mpq; } } else { PhraseQuery q = new PhraseQuery(); q.SetSlop(phraseSlop); for (int i = 0; i < v.Count; i++) { q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText())); } return q; } } }
public virtual void TestMPQ7() { MultiPhraseQuery q = new MultiPhraseQuery(); q.Add(Ta(new System.String[]{"w1"})); q.Add(Ta(new System.String[]{"w2"})); q.SetSlop(1); q.SetBoost(0.0f); Bqtest(q, new int[]{0, 1, 2}); }